From 85f907a245a8c6c4bfb94b17891c2012d2999730 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 22 May 2026 20:51:22 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama3-hh-helpful-qt045-b0p01-20260429-085449 Source: Original Platform --- .gitattributes | 36 + README.md | 62 + all_results.json | 9 + config.json | 29 + generation_config.json | 9 + margin_logs/margins.jsonl | 681 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + margin_logs/step_0000662.npy | 3 + margin_logs/step_0000663.npy | 3 + margin_logs/step_0000664.npy | 3 + margin_logs/step_0000665.npy | 3 + margin_logs/step_0000666.npy | 3 + margin_logs/step_0000667.npy | 3 + margin_logs/step_0000668.npy | 3 + margin_logs/step_0000669.npy | 3 + margin_logs/step_0000670.npy | 3 + margin_logs/step_0000671.npy | 3 + margin_logs/step_0000672.npy | 3 + margin_logs/step_0000673.npy | 3 + margin_logs/step_0000674.npy | 3 + margin_logs/step_0000675.npy | 3 + margin_logs/step_0000676.npy | 3 + margin_logs/step_0000677.npy | 3 + margin_logs/step_0000678.npy | 3 + margin_logs/step_0000679.npy | 3 + margin_logs/step_0000680.npy | 3 + margin_logs/step_0000681.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++ train.log | 1162 +++ train_results.json | 9 + trainer_state.json | 15706 +++++++++++++++++++++++++++++ 701 files changed, 22155 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 margin_logs/step_0000662.npy create mode 100644 margin_logs/step_0000663.npy create mode 100644 margin_logs/step_0000664.npy create mode 100644 margin_logs/step_0000665.npy create mode 100644 margin_logs/step_0000666.npy create mode 100644 margin_logs/step_0000667.npy create mode 100644 margin_logs/step_0000668.npy create mode 100644 margin_logs/step_0000669.npy create mode 100644 margin_logs/step_0000670.npy create mode 100644 margin_logs/step_0000671.npy create mode 100644 margin_logs/step_0000672.npy create mode 100644 margin_logs/step_0000673.npy create mode 100644 margin_logs/step_0000674.npy create mode 100644 margin_logs/step_0000675.npy create mode 100644 margin_logs/step_0000676.npy create mode 100644 margin_logs/step_0000677.npy create mode 100644 margin_logs/step_0000678.npy create mode 100644 margin_logs/step_0000679.npy create mode 100644 margin_logs/step_0000680.npy create mode 100644 margin_logs/step_0000681.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.log create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..b3362a9 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..ac66d6e --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.126299982641587, + "train_runtime": 1736.7793, + "train_samples": 43598, + "train_samples_per_second": 25.103, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..335e812 --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": 0.03308090567588806, "std": 0.35418060421943665, "min": -0.5293960571289062, "p10": -0.3359836578369141, "median": -0.042331695556640625, "p90": 0.5290863037109377, "max": 1.3264389038085938, "pos_frac": 0.421875, "sample": [0.056041717529296875, -0.1404876708984375, -0.51763916015625, 0.856475830078125, -0.376953125, 0.0019683837890625, 0.17377281188964844, 0.25421714782714844, 0.07147216796875, -0.06154632568359375, -0.09619903564453125, -0.17862510681152344, -0.07421875, -0.0475616455078125, -0.07773780822753906, -0.12851715087890625, 0.5426177978515625, -0.15134048461914062, 0.23279190063476562, -0.3286571502685547, -0.33716583251953125, -0.04317474365234375, -0.03061676025390625, 0.002685546875, -0.0414886474609375, -0.29346656799316406, 0.03073883056640625, 0.43576622009277344, 0.4975128173828125, -0.09253692626953125, 0.71759033203125, 0.6981201171875, -0.2122974395751953, 0.822601318359375, -0.011625289916992188, -0.3606109619140625, -0.5293960571289062, 0.11052894592285156, 0.240203857421875, 0.3279857635498047, -0.3332252502441406, -0.197723388671875, -0.12407684326171875, -0.04111480712890625, 0.14806365966796875, -0.27658653259277344, -0.17269134521484375, 0.2557258605957031, -0.4029502868652344, -0.35577392578125, 0.18115615844726562, -0.3004341125488281, 0.03314399719238281, 0.1793670654296875, 0.19315719604492188, 1.3264389038085938, -0.025981903076171875, -0.2816925048828125, -0.109710693359375, 0.714385986328125, -0.091827392578125, 0.17095947265625, -0.26153564453125, -0.05112457275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.026903808116912842, "std": 0.39596980810165405, "min": -1.694427490234375, "p10": -0.4559467315673828, "median": -0.03838539123535156, "p90": 0.47890777587890637, "max": 1.240478515625, "pos_frac": 0.453125, "sample": [-0.17768096923828125, -0.20566558837890625, 0.3115520477294922, -0.06421661376953125, -0.233306884765625, 0.17946624755859375, 0.510223388671875, -1.694427490234375, -0.4580421447753906, -0.0389251708984375, -0.10910987854003906, -0.19139480590820312, 0.49413299560546875, 0.0337982177734375, 0.165191650390625, -0.1699962615966797, -0.5142974853515625, 0.16002273559570312, 0.026905059814453125, -0.030286788940429688, -0.45105743408203125, 0.09743499755859375, -0.2850189208984375, -0.21456146240234375, -0.0045166015625, -0.24554443359375, -0.183624267578125, -0.037845611572265625, -0.15449905395507812, -0.188385009765625, -0.1370258331298828, 0.0039844512939453125, 0.6416778564453125, -0.0646514892578125, 0.21550559997558594, 0.5231075286865234, 0.10413742065429688, 0.5456085205078125, -0.5436363220214844, -0.0606842041015625, 0.44338226318359375, -0.12090873718261719, 0.40997314453125, 0.50921630859375, -0.09272003173828125, 0.145294189453125, -0.5738372802734375, 0.4407196044921875, 0.025438308715820312, -0.31734466552734375, 0.15048789978027344, 0.24699783325195312, 0.06943511962890625, -0.542510986328125, 0.0018978118896484375, 0.06427001953125, 1.240478515625, -0.12866973876953125, -0.37815093994140625, -0.10295867919921875, 0.1153411865234375, -0.7413253784179688, 0.01739501953125, -0.15809249877929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.033094823360443115, "std": 0.39475277066230774, "min": -1.0765533447265625, "p10": -0.4166366577148437, "median": 0.048415184020996094, "p90": 0.4538898468017578, "max": 1.1348876953125, "pos_frac": 0.59375, "sample": [0.2266082763671875, 0.16357421875, 0.12400054931640625, 0.541107177734375, 0.6762542724609375, -0.0589599609375, -0.16263961791992188, 0.046863555908203125, 0.07295799255371094, -0.3853302001953125, 0.11759185791015625, 0.793182373046875, 0.04996681213378906, 0.2749481201171875, -0.003170013427734375, -0.0526580810546875, 0.0006256103515625, -0.6391029357910156, 0.13449859619140625, -0.090576171875, 0.021135330200195312, 0.7157440185546875, 0.4538764953613281, 0.360565185546875, 0.10443878173828125, -0.6559066772460938, -0.0202789306640625, -0.15867233276367188, -0.3355560302734375, 0.15367889404296875, 0.2683544158935547, -0.0052547454833984375, 1.1348876953125, 0.31311607360839844, -0.5173988342285156, 0.259613037109375, 0.45389556884765625, 0.021183013916015625, -0.196502685546875, 0.30934906005859375, 0.40610504150390625, 0.41439056396484375, 0.13161468505859375, -1.0765533447265625, 0.6994857788085938, -1.0194244384765625, 0.002758026123046875, -0.10941696166992188, -0.59234619140625, -0.3362884521484375, -0.09119415283203125, -0.15970230102539062, 0.22991180419921875, 0.33785247802734375, 0.06012725830078125, 0.08795166015625, 0.057952880859375, -0.3737640380859375, -0.4300537109375, 0.00103759765625, -0.07614898681640625, -0.265472412109375, -0.3614501953125, 0.07068634033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.007514625787734985, "std": 0.38216954469680786, "min": -0.9972686767578125, "p10": -0.46891937255859373, "median": 0.0034809112548828125, "p90": 0.46547698974609386, "max": 0.886260986328125, "pos_frac": 0.5, "sample": [0.321380615234375, -0.1691761016845703, 0.15880393981933594, -0.4688873291015625, 0.08837127685546875, 0.886260986328125, -0.4112663269042969, -0.4348907470703125, 0.4066619873046875, -0.2081146240234375, 0.3820304870605469, 0.4936943054199219, -0.7308502197265625, 0.4318733215332031, -0.001789093017578125, -0.4166259765625, -0.22088623046875, -0.5577583312988281, 0.4770355224609375, 0.3048057556152344, -0.3462028503417969, -0.5083293914794922, 0.37225341796875, -0.026311874389648438, -0.12540245056152344, 0.1793975830078125, 0.438507080078125, 0.00875091552734375, -0.18023681640625, -0.12633895874023438, 0.34912109375, -0.25424766540527344, 0.11420440673828125, 0.10140609741210938, -0.168182373046875, 0.62139892578125, -0.730621337890625, -0.2751731872558594, 0.1958789825439453, 0.02364349365234375, -0.46893310546875, -0.089385986328125, -0.5550537109375, 0.12517356872558594, 0.5292129516601562, 0.1530780792236328, 0.0420684814453125, 0.3308429718017578, 0.36346435546875, -0.31366729736328125, -0.08727264404296875, -0.18389129638671875, -0.14939117431640625, 0.015333175659179688, 0.8744354248046875, -0.005680084228515625, 0.18592453002929688, -0.9972686767578125, -0.033172607421875, 0.19985198974609375, -0.0723114013671875, -0.09508514404296875, 0.6774749755859375, 0.0410003662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.055707335472106934, "std": 0.3949728012084961, "min": -1.1397933959960938, "p10": -0.46426429748535153, "median": 0.05512046813964844, "p90": 0.5859375, "max": 0.9288787841796875, "pos_frac": 0.5625, "sample": [-0.10405731201171875, -0.2880096435546875, 0.014972686767578125, 0.05789947509765625, -1.1397933959960938, 0.32428741455078125, -0.5356674194335938, 0.147125244140625, -0.42238616943359375, 0.22858428955078125, -0.315338134765625, 0.4044647216796875, 0.4839324951171875, -0.04021453857421875, -0.19316864013671875, 0.053436279296875, -0.18062210083007812, -0.4822120666503906, -0.07632064819335938, -0.6546134948730469, -0.16966629028320312, 0.60772705078125, 0.5777130126953125, -0.029773712158203125, 0.41583251953125, 0.14766693115234375, -0.09889411926269531, -0.09553909301757812, 0.17668914794921875, -0.16213607788085938, 0.12108993530273438, 0.8680038452148438, 0.056804656982421875, 0.47393798828125, 0.9288787841796875, 0.06838226318359375, 0.0344085693359375, 0.15340614318847656, -0.0149688720703125, 0.126739501953125, -0.4880218505859375, -0.665924072265625, 0.17974090576171875, 0.0747222900390625, 0.05947113037109375, -0.2742156982421875, 0.6384429931640625, -0.1526641845703125, -0.082672119140625, 0.03760528564453125, 0.9262847900390625, -0.6263351440429688, -0.2482147216796875, 0.7765731811523438, -0.06246185302734375, 0.1737518310546875, 0.19930267333984375, 0.49541473388671875, 0.16585159301757812, -0.07513427734375, 0.3542938232421875, 0.1287994384765625, -0.02740478515625, 0.5894622802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.010594218969345093, "std": 0.4293593764305115, "min": -1.030303955078125, "p10": -0.5618034362792969, "median": 0.03712272644042969, "p90": 0.4851425170898439, "max": 0.9192104339599609, "pos_frac": 0.546875, "sample": [0.0600433349609375, 0.50445556640625, -0.16412353515625, 0.06301116943359375, -0.8667755126953125, 0.039764404296875, 0.5054054260253906, 0.4400787353515625, 0.259796142578125, -0.10739898681640625, -0.15497589111328125, 0.37149810791015625, -0.0269775390625, -0.003101348876953125, 0.2376708984375, 0.32817840576171875, 0.01511383056640625, 0.3520698547363281, 0.18402099609375, -0.25514984130859375, -0.8494873046875, -0.98828125, 0.272186279296875, 0.3139801025390625, -0.06984901428222656, -0.530609130859375, -0.12061309814453125, 0.07479095458984375, 0.2733917236328125, -0.17912673950195312, 0.2225341796875, -0.5751724243164062, 0.034481048583984375, 0.632476806640625, -0.5283889770507812, -0.37873077392578125, -0.154449462890625, 0.08188438415527344, 0.048065185546875, 0.18080902099609375, 0.637176513671875, -0.19140625, 0.1638336181640625, 0.2452545166015625, -0.30585670471191406, -0.992645263671875, -1.030303955078125, 0.132720947265625, 0.10613441467285156, 0.8413314819335938, -0.48166656494140625, -0.01486968994140625, 0.8278350830078125, -0.02880096435546875, 0.17242431640625, -0.0192108154296875, -0.28781890869140625, -0.668487548828125, -0.1997833251953125, -0.5011138916015625, 0.2870197296142578, 0.9192104339599609, 0.0011539459228515625, 0.1673431396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.04815271496772766, "std": 0.3814266324043274, "min": -0.956146240234375, "p10": -0.42430419921875, "median": 0.017974853515625, "p90": 0.6137950897216797, "max": 0.9036712646484375, "pos_frac": 0.53125, "sample": [-0.0808258056640625, -0.2951812744140625, 0.6848831176757812, 0.405242919921875, 0.04438018798828125, -0.1177520751953125, 0.06735992431640625, -0.22348785400390625, 0.0695648193359375, 0.15133285522460938, -0.4777259826660156, -0.956146240234375, -0.4019927978515625, -0.0499267578125, -0.1571197509765625, 0.29833984375, 0.25363922119140625, -0.4219169616699219, -0.521697998046875, -0.4253273010253906, -0.12277984619140625, 0.0101165771484375, 0.261322021484375, -0.1541290283203125, 0.5172042846679688, -0.15370941162109375, 0.15102386474609375, 0.527435302734375, 0.03491783142089844, 0.7014122009277344, 0.14659881591796875, -0.1999797821044922, -0.5486373901367188, 0.1100616455078125, -0.10959625244140625, -0.28089141845703125, -0.033329010009765625, 0.0003108978271484375, 0.8986358642578125, 0.2127532958984375, -0.24965667724609375, -0.48302459716796875, -0.58905029296875, -0.21209335327148438, -0.0605316162109375, 0.15797805786132812, 0.0258331298828125, 0.0849761962890625, 0.10529327392578125, 0.11152076721191406, 0.10943603515625, -0.09698486328125, 0.8112640380859375, 0.5908546447753906, 0.5723457336425781, 0.623626708984375, -0.0936279296875, 0.9036712646484375, -0.09481620788574219, 0.646881103515625, -0.02295684814453125, 0.3775482177734375, -0.217437744140625, 0.2663421630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": -0.001463174819946289, "std": 0.3902728259563446, "min": -0.8311614990234375, "p10": -0.44131011962890626, "median": -0.038887977600097656, "p90": 0.6196060180664064, "max": 0.9255523681640625, "pos_frac": 0.46875, "sample": [-0.07355499267578125, 0.04744720458984375, -0.07586860656738281, -0.2613334655761719, -0.39136695861816406, 0.6706161499023438, 0.019376754760742188, 0.5973892211914062, 0.6291275024414062, -0.31211090087890625, -0.09336090087890625, 0.06523513793945312, -0.11175537109375, -0.05181884765625, -0.443695068359375, 0.9106292724609375, -0.3892326354980469, -0.2104034423828125, 0.03885459899902344, 0.01834869384765625, 0.045177459716796875, -0.4983024597167969, 0.118316650390625, 0.05778694152832031, -0.22638702392578125, -0.11710739135742188, -0.49324798583984375, -0.3094635009765625, -0.025957107543945312, 0.18902969360351562, 0.124786376953125, -0.1232452392578125, 0.1505718231201172, -0.6654586791992188, -0.32532501220703125, 0.120361328125, -0.458282470703125, 0.7146835327148438, 0.2568836212158203, -0.08184051513671875, -0.3095664978027344, -0.6650238037109375, -0.14169692993164062, 0.722747802734375, 0.09381103515625, 0.5955581665039062, 0.4874153137207031, 0.084930419921875, -0.13419342041015625, -0.2111968994140625, -0.24085235595703125, 0.058338165283203125, 0.40396881103515625, 0.07459259033203125, -0.8311614990234375, -0.0227203369140625, 0.288970947265625, -0.16632843017578125, -0.4217071533203125, 0.9255523681640625, 0.8909072875976562, 0.024831771850585938, -0.2005786895751953, -0.4357452392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.0049735307693481445, "std": 0.29339078068733215, "min": -0.689208984375, "p10": -0.3091766357421875, "median": -0.026834487915039062, "p90": 0.431584930419922, "max": 0.73687744140625, "pos_frac": 0.453125, "sample": [-0.301422119140625, 0.0435943603515625, 0.5315799713134766, 0.2107257843017578, -0.5142059326171875, -0.11260986328125, -0.079803466796875, 0.14833641052246094, -0.12496757507324219, -0.211029052734375, -0.12408638000488281, -0.4106903076171875, -0.012725830078125, 0.05501556396484375, 0.2336578369140625, -0.1355438232421875, 0.12885284423828125, -0.15060043334960938, -0.15081787109375, 0.3881568908691406, 0.18314170837402344, -0.4676513671875, -0.1800537109375, 0.03379249572753906, -0.13866424560546875, 0.4705772399902344, -0.3125, -0.0301361083984375, -0.1878662109375, 0.6031646728515625, 0.65997314453125, -0.1624298095703125, 0.49326133728027344, -0.4498291015625, -0.030956268310546875, 0.08673095703125, 0.1368560791015625, 0.73687744140625, -0.03740692138671875, -0.49066162109375, -0.023532867431640625, -0.08552932739257812, 0.012420654296875, -0.27166748046875, 0.0233917236328125, 0.16878890991210938, 0.3726348876953125, -0.22854995727539062, -0.240234375, 0.39697265625, -0.07165145874023438, -0.2846336364746094, 0.24274444580078125, -0.689208984375, -0.051563262939453125, -0.012632369995117188, 0.0186920166015625, -0.19589996337890625, 0.18935394287109375, 0.44641876220703125, 0.30106353759765625, 0.0044765472412109375, -0.054714202880859375, 0.023529052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.0603850781917572, "std": 0.4376567006111145, "min": -1.2303466796875, "p10": -0.46402053833007806, "median": 0.03849983215332031, "p90": 0.5856338500976563, "max": 1.44720458984375, "pos_frac": 0.578125, "sample": [0.536956787109375, -0.42437744140625, 0.5506744384765625, -1.035797119140625, 0.3840179443359375, 0.29521942138671875, -0.14783096313476562, 0.5201797485351562, 0.05979156494140625, 0.05229949951171875, -0.1231689453125, -0.575103759765625, 0.00353240966796875, -0.1818084716796875, -0.24041748046875, 0.22756195068359375, 0.042453765869140625, 0.949371337890625, 0.20778465270996094, -0.06808090209960938, -1.2303466796875, -0.26215362548828125, -0.547271728515625, -0.6264266967773438, -0.008697509765625, 0.23242950439453125, 0.2928466796875, 1.44720458984375, 0.08284378051757812, 0.07696533203125, -0.643646240234375, 0.7810516357421875, 0.2099609375, 0.028715133666992188, -0.48101043701171875, 0.1659412384033203, -0.09874343872070312, -0.3618316650390625, 0.6596603393554688, -0.08398056030273438, -0.27744293212890625, -0.0102996826171875, -0.08559226989746094, -0.08926773071289062, 0.1047515869140625, 0.6038360595703125, 0.600616455078125, 0.3017120361328125, 0.19052505493164062, 0.13098907470703125, 0.02175140380859375, 0.1693115234375, 0.0345458984375, 0.34342193603515625, 0.9684906005859375, -0.133636474609375, 0.086151123046875, -0.04223823547363281, -0.05173301696777344, -0.03637886047363281, 0.3580322265625, -0.1162109375, 0.03094482421875, 0.0955963134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": -0.012606710195541382, "std": 0.3879392147064209, "min": -0.8348846435546875, "p10": -0.41876373291015623, "median": -0.027482986450195312, "p90": 0.3959533691406251, "max": 1.19598388671875, "pos_frac": 0.453125, "sample": [-0.2348785400390625, -0.2889060974121094, -0.1236724853515625, -0.18304443359375, 0.18569183349609375, -0.17574310302734375, 0.2083740234375, 0.039852142333984375, 0.40708160400390625, 0.09680557250976562, -0.0475616455078125, -0.0446624755859375, -0.5437164306640625, -0.4591636657714844, -0.039760589599609375, 0.6646957397460938, -0.17181396484375, -0.005645751953125, -0.3496570587158203, 0.00408172607421875, -0.05970573425292969, 0.15435791015625, 0.8725433349609375, 0.2503948211669922, -0.22856712341308594, -0.8348846435546875, -0.21506881713867188, -0.0393524169921875, -0.382781982421875, -0.79168701171875, -0.4132843017578125, -0.34661865234375, 0.5568046569824219, 0.08660507202148438, 0.0198516845703125, 0.10404205322265625, -0.421112060546875, -0.26980018615722656, -0.1978321075439453, 0.03546142578125, 0.34442138671875, 0.14892578125, 0.752105712890625, 0.17776107788085938, 0.273193359375, 0.3300304412841797, -0.345977783203125, -0.00652313232421875, -0.015613555908203125, -0.150482177734375, -0.7963180541992188, 0.36998748779296875, -0.14810943603515625, -0.221832275390625, -0.35643768310546875, 0.1851959228515625, 0.05673980712890625, 0.90283203125, 1.19598388671875, -0.19255828857421875, 0.002532958984375, 0.1920013427734375, 0.21875, -0.5411605834960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": -0.03501787781715393, "std": 0.42796674370765686, "min": -1.57928466796875, "p10": -0.4372806549072265, "median": -0.023003578186035156, "p90": 0.3856952667236328, "max": 1.204925537109375, "pos_frac": 0.46875, "sample": [-0.94793701171875, -0.098419189453125, -0.019533157348632812, -0.07656478881835938, 1.204925537109375, -0.15840911865234375, -0.48681640625, -0.0546112060546875, -0.30016326904296875, 0.3819541931152344, 0.24332427978515625, -0.483642578125, -0.2224578857421875, 0.9245376586914062, 0.5041923522949219, 0.0709686279296875, -0.18712615966796875, -0.08530426025390625, 0.7054367065429688, -0.14690017700195312, 0.246734619140625, -0.21512985229492188, 0.06009674072265625, 0.27458953857421875, 0.4475555419921875, -0.038219451904296875, -1.57928466796875, 0.0550537109375, 0.0803680419921875, 0.0171661376953125, -0.0264739990234375, -0.03621864318847656, -0.147247314453125, 0.22559356689453125, 0.04479026794433594, 0.004367828369140625, 0.12919998168945312, -1.0123291015625, 0.06010627746582031, -0.14783287048339844, 0.387298583984375, -0.07262039184570312, -0.1439361572265625, -0.35406494140625, 0.1602325439453125, 0.06398391723632812, 0.09175872802734375, -0.2882652282714844, -0.36578369140625, -0.014789581298828125, -1.2387771606445312, -0.05811309814453125, 0.0555877685546875, -0.17207717895507812, -0.06554794311523438, 0.5714874267578125, 0.17087554931640625, 0.3126220703125, -0.08025360107421875, 0.07622146606445312, -0.4679222106933594, 0.08835601806640625, 0.10344696044921875, -0.21120452880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.04670119285583496, "std": 0.32564663887023926, "min": -1.0027084350585938, "p10": -0.3901145935058593, "median": 0.09145069122314453, "p90": 0.4201461791992189, "max": 0.85986328125, "pos_frac": 0.59375, "sample": [0.09296607971191406, 0.0611572265625, 0.23439598083496094, -0.00786590576171875, 0.615325927734375, 0.12107467651367188, -0.11824417114257812, -0.452392578125, -0.0893096923828125, 0.6761856079101562, 0.1387481689453125, 0.07974815368652344, 0.15484619140625, -0.27325439453125, 0.6312294006347656, 0.4415283203125, 0.16476821899414062, -0.06856536865234375, 0.4557952880859375, 0.12996673583984375, 0.15897369384765625, -1.0027084350585938, 0.17473602294921875, 0.256683349609375, -0.5882129669189453, 0.2909698486328125, 0.09870147705078125, -0.056549072265625, -0.09270477294921875, 0.0880279541015625, -0.1955108642578125, -0.406646728515625, -0.45203399658203125, -0.08617973327636719, 0.17176055908203125, 0.3389892578125, -0.06395721435546875, 0.85986328125, 0.32883453369140625, -0.21018218994140625, -0.07198905944824219, -0.35153961181640625, 0.11896896362304688, 0.089935302734375, 0.21096038818359375, -0.015285491943359375, 0.05908012390136719, -0.1479473114013672, 0.18426513671875, -0.172271728515625, 0.09583282470703125, 0.14064788818359375, 0.073333740234375, 0.1528167724609375, -0.01508331298828125, 0.575408935546875, -0.41314697265625, 0.3702545166015625, 0.23136138916015625, -0.07360076904296875, -0.1052093505859375, -0.8221092224121094, 0.15258407592773438, 0.1206512451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": -0.005795121192932129, "std": 0.39462992548942566, "min": -1.1673355102539062, "p10": -0.4273609161376953, "median": -0.01195526123046875, "p90": 0.4184675216674806, "max": 1.029998779296875, "pos_frac": 0.5, "sample": [0.2674427032470703, -0.1385498046875, -0.696929931640625, -0.1425018310546875, 0.32959747314453125, 0.25177764892578125, 0.1607818603515625, 0.10777664184570312, 0.1980133056640625, 0.6677398681640625, -0.1672210693359375, 0.2914276123046875, -0.033721923828125, -0.190765380859375, 0.09005355834960938, -0.23703765869140625, -0.10472869873046875, -0.590118408203125, -0.409515380859375, 0.01983642578125, 0.00717926025390625, 0.42901039123535156, 0.3320045471191406, 0.31528472900390625, 0.2780494689941406, -0.4820709228515625, -0.5661849975585938, -0.13570213317871094, -0.3630218505859375, -0.24225425720214844, 0.18914794921875, 0.242950439453125, 0.0357818603515625, -0.13787078857421875, -0.03108978271484375, 0.2810211181640625, -0.19712066650390625, -0.1622161865234375, 0.16001129150390625, 0.16548538208007812, 0.557373046875, -0.13158416748046875, 0.0076580047607421875, -0.259246826171875, 0.39386749267578125, 0.6802787780761719, 0.010366439819335938, -0.4350090026855469, 0.149658203125, 0.9688873291015625, -0.3290977478027344, -0.2293243408203125, -0.0848846435546875, -0.33362388610839844, 0.03266143798828125, -0.39998626708984375, -0.2189311981201172, 0.3794403076171875, 0.5866355895996094, 1.029998779296875, -0.2976226806640625, -0.820892333984375, -0.2519264221191406, -1.1673355102539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.11452355980873108, "std": 0.38878893852233887, "min": -1.0838775634765625, "p10": -0.30015087127685547, "median": 0.11728954315185547, "p90": 0.5046363830566406, "max": 1.3037567138671875, "pos_frac": 0.65625, "sample": [-0.4383735656738281, 0.11283302307128906, 0.21633148193359375, 0.11029815673828125, -0.3097724914550781, -0.11524391174316406, 0.6353607177734375, -1.0540313720703125, 0.00835418701171875, -0.21498870849609375, 0.640289306640625, 0.099578857421875, 0.4999847412109375, 0.69036865234375, 0.04277801513671875, -0.2502288818359375, -0.3105583190917969, 0.13266563415527344, 0.36457061767578125, 0.37050628662109375, 0.06926727294921875, -0.004795074462890625, 0.002773284912109375, 0.2209930419921875, 0.33106231689453125, 0.42730712890625, 0.26168251037597656, -0.046600341796875, 0.5358428955078125, 0.1761474609375, 0.24959564208984375, 1.3037567138671875, 0.2970123291015625, 0.5066299438476562, -0.09285926818847656, 0.3921966552734375, -0.20054244995117188, 0.12174606323242188, -0.04190254211425781, 0.125, 1.1280746459960938, 0.1871795654296875, -0.1412372589111328, 0.2965583801269531, -1.0838775634765625, 0.27911376953125, 0.4964103698730469, -0.27770042419433594, 0.03052520751953125, -0.05928802490234375, -0.1682281494140625, -0.08608627319335938, -0.4320220947265625, -0.3510322570800781, 0.36605072021484375, 0.30426025390625, 0.143218994140625, 0.124542236328125, 0.0160675048828125, 0.01049041748046875, -0.06580543518066406, -0.02423095703125, 0.369873046875, 0.4016151428222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.01890420913696289, "std": 0.3216187655925751, "min": -1.153961181640625, "p10": -0.3086668014526367, "median": 0.01666545867919922, "p90": 0.37118492126464847, "max": 0.9289016723632812, "pos_frac": 0.53125, "sample": [-0.00777435302734375, -0.742523193359375, 0.5453109741210938, 0.48625946044921875, -0.2850494384765625, -0.02375030517578125, -0.37720489501953125, -0.4535980224609375, 0.1935272216796875, 0.20523643493652344, 0.030120849609375, -0.0577239990234375, -0.25913238525390625, 0.46079254150390625, -0.066650390625, 0.5001907348632812, -0.21516799926757812, 0.12784576416015625, 0.15195083618164062, -0.0463104248046875, 0.21072959899902344, 0.07101058959960938, 0.272735595703125, -0.28155517578125, 0.16697311401367188, 0.3041667938232422, 0.9289016723632812, -0.47113800048828125, -0.3187885284423828, 0.010091781616210938, 0.3612861633300781, 0.23273658752441406, 0.37542724609375, -0.233978271484375, -0.0324859619140625, -0.5388641357421875, 0.19955825805664062, 0.2884044647216797, -0.17193603515625, 0.11644744873046875, 0.1710205078125, -0.05866241455078125, -0.00826263427734375, -0.076751708984375, 0.4758453369140625, 0.291107177734375, 0.12521743774414062, -0.1333465576171875, -0.243377685546875, 0.11473464965820312, -0.077606201171875, -0.09730339050292969, 0.06638336181640625, -0.044414520263671875, 0.21933746337890625, 0.045440673828125, 0.013824462890625, 0.019506454467773438, -1.153961181640625, 0.3231029510498047, -0.0161590576171875, -0.2720489501953125, -0.18123626708984375, 0.0514068603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.07813867926597595, "std": 0.37635213136672974, "min": -0.908203125, "p10": -0.3678413391113281, "median": 0.059157371520996094, "p90": 0.5944869995117188, "max": 1.0789833068847656, "pos_frac": 0.59375, "sample": [0.31450843811035156, -0.03349113464355469, -0.2376861572265625, -0.3932228088378906, 0.10393524169921875, -0.225433349609375, 0.16641616821289062, 0.2717018127441406, -0.12423896789550781, 0.84600830078125, 0.47408294677734375, -0.36837005615234375, 1.0789833068847656, -0.05670928955078125, 0.021148681640625, -0.908203125, 0.2044525146484375, -0.112579345703125, -0.5384292602539062, 0.39752197265625, -0.315032958984375, 0.039775848388671875, 0.037570953369140625, -0.363677978515625, 0.11117744445800781, 0.17821121215820312, 0.31006622314453125, 0.0448455810546875, -0.4073371887207031, -0.36989593505859375, -0.21419525146484375, 0.29718971252441406, 0.036952972412109375, 0.064697265625, 0.7831954956054688, 0.13206863403320312, -0.31635284423828125, -0.06959724426269531, 0.587127685546875, -0.366607666015625, -0.04837799072265625, 0.5976409912109375, 0.11688995361328125, 0.1429595947265625, 0.06494712829589844, 0.3894462585449219, 0.5069503784179688, -0.0399017333984375, 0.19757080078125, 0.1195220947265625, 0.7054595947265625, 0.07476425170898438, 0.05361747741699219, -0.08666610717773438, 0.927703857421875, 0.07272911071777344, -0.4679718017578125, -0.17482376098632812, 0.2041168212890625, -0.33953857421875, 0.7154998779296875, -0.18437576293945312, -0.035221099853515625, 0.40735626220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": -0.007474362850189209, "std": 0.37891167402267456, "min": -1.423004150390625, "p10": -0.42857627868652337, "median": 0.024425506591796875, "p90": 0.3195617675781252, "max": 1.0319137573242188, "pos_frac": 0.515625, "sample": [0.127410888671875, 0.15740394592285156, 0.08493995666503906, -0.26739501953125, -0.4540863037109375, 0.2276744842529297, -0.0469970703125, 0.1801166534423828, -0.14490509033203125, 0.224853515625, -0.8501129150390625, -0.0836944580078125, -0.2990226745605469, 0.1475238800048828, -0.031005859375, 0.04676055908203125, -0.4557991027832031, 0.11981582641601562, -0.24111175537109375, 0.262481689453125, 0.36861419677734375, -0.28652191162109375, 0.197998046875, -0.18596458435058594, 0.0020904541015625, -0.08450889587402344, -0.7392654418945312, 0.3557262420654297, -0.33115386962890625, -0.3690528869628906, -0.19121742248535156, -0.5263442993164062, -0.17290878295898438, 0.5029621124267578, 0.18671607971191406, -0.062042236328125, -0.53265380859375, -1.423004150390625, -0.049571990966796875, -0.09369659423828125, 0.2062835693359375, -0.09032440185546875, 0.1373291015625, -0.11797332763671875, 0.25336456298828125, 0.14310264587402344, 0.2387237548828125, 0.99017333984375, 0.20870208740234375, -0.0541534423828125, -0.3358612060546875, 0.344024658203125, 0.11568832397460938, 0.176483154296875, -0.2957611083984375, -0.12798118591308594, 0.73028564453125, 0.07650375366210938, 0.12550926208496094, -0.022989273071289062, 0.19858169555664062, 1.0319137573242188, 0.16527557373046875, 0.1536884307861328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.07176269590854645, "std": 0.44084295630455017, "min": -0.9267120361328125, "p10": -0.3498649597167969, "median": 0.048831939697265625, "p90": 0.6704814910888677, "max": 1.50592041015625, "pos_frac": 0.53125, "sample": [0.2385101318359375, -0.09771347045898438, 0.7795181274414062, 0.73162841796875, 1.50592041015625, 0.0799856185913086, -0.04314994812011719, 0.014089584350585938, -0.3404388427734375, -0.2336273193359375, 0.723663330078125, -0.35298919677734375, 0.2567291259765625, 0.08767318725585938, 0.20697784423828125, 0.7407302856445312, 0.30792236328125, -0.17719078063964844, -0.2393207550048828, 0.24701690673828125, 0.093170166015625, -0.22674560546875, -0.241668701171875, -0.09837722778320312, -0.37800025939941406, 1.193359375, -0.656890869140625, -0.3876380920410156, -0.2186279296875, 0.10120391845703125, -0.9267120361328125, -0.03969573974609375, -0.18572616577148438, 0.15975189208984375, 0.10814666748046875, -0.05499839782714844, -0.3415260314941406, 0.0255584716796875, 0.363739013671875, -0.7846298217773438, -0.2891998291015625, -0.4645423889160156, -0.00348663330078125, -0.12889862060546875, 0.22324371337890625, -0.24961090087890625, 0.12803268432617188, 0.23174285888671875, 0.4930095672607422, -0.1624908447265625, 0.1545734405517578, 0.4149055480957031, 0.4764518737792969, 0.44012451171875, 0.1652374267578125, -0.3425750732421875, -0.088623046875, -0.12532806396484375, 1.1958160400390625, 0.5463905334472656, 0.07210540771484375, 0.0896759033203125, 0.1792144775390625, -0.3025836944580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.12175118923187256, "std": 0.3927026391029358, "min": -0.965057373046875, "p10": -0.32214508056640623, "median": 0.12843990325927734, "p90": 0.5325328826904298, "max": 1.146759033203125, "pos_frac": 0.671875, "sample": [-0.174652099609375, 0.947906494140625, -0.6212539672851562, 0.08773040771484375, 0.3201446533203125, -0.21434783935546875, -0.4738922119140625, 0.119842529296875, 0.12073516845703125, 0.5648956298828125, 0.4872550964355469, 0.08453369140625, -0.15692138671875, 0.5035362243652344, -0.18318748474121094, 0.04465484619140625, 0.3371124267578125, 0.24120712280273438, 0.13329315185546875, -0.9165802001953125, -0.05419921875, 1.0814971923828125, -0.305023193359375, -0.3294830322265625, 0.25972557067871094, -0.03719520568847656, 0.399383544921875, 0.08949661254882812, 0.183868408203125, 0.025421142578125, 0.42928123474121094, 0.05977630615234375, 0.5737762451171875, -0.09898757934570312, 0.1510467529296875, 0.1378173828125, 0.424163818359375, -0.20098114013671875, -0.1412220001220703, 0.44268035888671875, 0.263885498046875, 0.2796363830566406, 0.1354961395263672, 0.4304008483886719, -0.0018138885498046875, 0.5449600219726562, -0.4027099609375, -0.4361724853515625, 1.146759033203125, 0.3247222900390625, 0.12709617614746094, 0.37863922119140625, -0.08034324645996094, -0.29961585998535156, -0.965057373046875, 0.1309814453125, 0.12978363037109375, 0.2654228210449219, 0.26818084716796875, 0.500946044921875, 0.070831298828125, 0.7005577087402344, 0.00897216796875, -0.07233810424804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.17535346746444702, "std": 0.4857688248157501, "min": -1.28759765625, "p10": -0.334150505065918, "median": 0.18767261505126953, "p90": 0.8059234619140627, "max": 1.4090118408203125, "pos_frac": 0.65625, "sample": [-0.13706398010253906, -0.30406951904296875, 0.8749847412109375, 0.013286590576171875, 0.22306060791015625, 0.6394996643066406, 0.8266372680664062, -0.1624603271484375, -0.6095294952392578, -1.0809783935546875, 1.4090118408203125, 1.2901382446289062, -0.28466796875, 0.0331268310546875, 0.29277801513671875, 0.458740234375, -0.23298263549804688, 0.6969566345214844, -0.41542816162109375, -0.06342506408691406, 0.4706268310546875, 0.04803466796875, 0.85064697265625, 0.2929496765136719, 0.012544631958007812, -0.0035247802734375, 0.22435760498046875, -0.2857856750488281, 0.24467849731445312, 0.25567626953125, 0.2912483215332031, 0.86859130859375, -0.0786895751953125, 0.3882884979248047, 0.45945167541503906, 0.24401092529296875, -0.0182037353515625, 0.4577827453613281, 0.14911842346191406, 0.00058746337890625, 0.2064208984375, 0.1817779541015625, -0.0091552734375, 0.4867210388183594, 0.2744483947753906, 1.20001220703125, 0.09996795654296875, -1.28759765625, 0.6937255859375, -0.3948211669921875, 0.19356727600097656, 0.3756980895996094, 0.061817169189453125, -0.3351764678955078, 0.07935714721679688, 0.32511138916015625, -0.4190826416015625, 0.369964599609375, -0.06822586059570312, -0.331756591796875, 0.7575912475585938, -0.13726806640625, -0.062229156494140625, 0.6217479705810547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.2795855402946472, "std": 0.3784217834472656, "min": -0.43241119384765625, "p10": -0.22492008209228512, "median": 0.2399749755859375, "p90": 0.7904762268066408, "max": 1.299652099609375, "pos_frac": 0.765625, "sample": [0.6617965698242188, 1.0125541687011719, 0.0042476654052734375, -0.15191650390625, 0.2850189208984375, 0.19370079040527344, 0.23804473876953125, -0.3353118896484375, 0.43947601318359375, 0.13137054443359375, 0.297149658203125, 0.12730789184570312, -0.028049468994140625, 0.6472930908203125, 0.46147918701171875, 0.20560073852539062, 0.295440673828125, 0.5728969573974609, 0.941925048828125, -0.02013397216796875, 0.5930595397949219, -0.4172325134277344, -0.24114036560058594, 0.4208221435546875, -0.186309814453125, 0.6274757385253906, 0.4910545349121094, -0.24755859375, 0.3604583740234375, 0.4060821533203125, 1.299652099609375, 0.809600830078125, 0.5937423706054688, 0.1502838134765625, 0.7526168823242188, -0.18707275390625, 0.2057037353515625, -0.343994140625, -0.10120391845703125, -0.30207061767578125, 0.7451324462890625, 0.6300048828125, 0.03759765625, -0.04288482666015625, 0.2743988037109375, 1.01361083984375, 0.80670166015625, 0.10854911804199219, 0.240478515625, 0.1705303192138672, 0.3323249816894531, 0.19355201721191406, 0.16436004638671875, 0.11181068420410156, 0.9485015869140625, 0.239471435546875, 0.44542694091796875, 0.36034393310546875, 0.20473098754882812, -0.43241119384765625, 0.3222198486328125, -0.0962677001953125, 0.1688690185546875, 0.282562255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.28972867131233215, "std": 0.42996782064437866, "min": -0.8564548492431641, "p10": -0.1403076171875, "median": 0.26364898681640625, "p90": 0.8717239379882815, "max": 1.4050140380859375, "pos_frac": 0.8125, "sample": [-0.034908294677734375, 0.06756973266601562, 1.12762451171875, 0.2424774169921875, 0.24890899658203125, 0.029083251953125, -0.13826751708984375, 0.7153778076171875, 0.384002685546875, 0.030111312866210938, -0.43766021728515625, 0.7855224609375, 0.26735687255859375, 0.8965301513671875, 1.4050140380859375, -0.3927326202392578, -0.8564548492431641, 0.296783447265625, -0.0710296630859375, 0.460601806640625, 0.11241912841796875, 0.153411865234375, 0.29844093322753906, 0.33526611328125, 0.36457061767578125, 0.48654937744140625, 0.38600921630859375, 1.0509872436523438, -0.2946319580078125, 0.10790824890136719, 0.25994110107421875, 0.40660858154296875, 0.44556427001953125, 0.3694610595703125, 0.15462493896484375, -0.06798934936523438, -0.1168670654296875, 1.2345428466796875, 0.0488433837890625, 0.214263916015625, -0.24599266052246094, 0.8138427734375, 0.4698944091796875, 0.2694587707519531, 0.3526649475097656, 0.44550323486328125, -0.14118194580078125, 0.0381622314453125, 0.8967971801757812, 0.4531745910644531, 1.011260986328125, 0.45055389404296875, 0.6560630798339844, 0.2122802734375, 0.6307716369628906, 0.07121086120605469, -0.80462646484375, 0.7689704895019531, 0.17465972900390625, 0.13364028930664062, 0.20592498779296875, 0.5655975341796875, 0.0760650634765625, 0.062103271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.40463075041770935, "std": 0.5524094700813293, "min": -0.560577392578125, "p10": -0.15501251220703124, "median": 0.33834266662597656, "p90": 1.2005439758300782, "max": 1.754241943359375, "pos_frac": 0.71875, "sample": [0.8695831298828125, -0.06521987915039062, 0.07854270935058594, 0.19022750854492188, 1.192901611328125, 0.342864990234375, 0.66754150390625, 0.3338203430175781, 0.23856353759765625, -0.3984394073486328, 0.763946533203125, 0.18773651123046875, 1.754241943359375, 0.5292816162109375, -0.06752395629882812, 1.436676025390625, -0.39771270751953125, 0.16748046875, 0.645782470703125, -0.15935897827148438, 0.4400157928466797, 0.11839675903320312, 0.5306015014648438, 0.3992118835449219, -0.0424957275390625, 1.2038192749023438, 0.2618255615234375, -0.560577392578125, 0.4793052673339844, 0.6750717163085938, -0.4965972900390625, 0.12728118896484375, 0.3705024719238281, 0.46209144592285156, -0.1118316650390625, 0.472320556640625, 1.680145263671875, -0.07456207275390625, 0.3228492736816406, 1.5840606689453125, 0.4275703430175781, 0.535614013671875, 1.6875228881835938, 0.894378662109375, 0.42963409423828125, 0.9822235107421875, -0.045360565185546875, 0.4308624267578125, -0.08806610107421875, -0.14487075805664062, 0.1633319854736328, -0.35511016845703125, -0.10185813903808594, -0.04714202880859375, -0.0308074951171875, 0.525909423828125, 1.0108261108398438, 0.9892578125, 1.51458740234375, 0.20880889892578125, 0.298309326171875, -0.20826339721679688, 0.5991020202636719, 0.06753730773925781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5461829900741577, "std": 0.6362072825431824, "min": -0.4836921691894531, "p10": -0.1802103042602539, "median": 0.43289947509765625, "p90": 1.454279708862305, "max": 2.9795303344726562, "pos_frac": 0.828125, "sample": [0.24343299865722656, 0.33861541748046875, 1.4904327392578125, 0.27860260009765625, 0.6199989318847656, 1.2081451416015625, -0.319671630859375, 0.367584228515625, 1.6176528930664062, -0.4372367858886719, 1.0322074890136719, 0.24822616577148438, 1.3699226379394531, 0.09881782531738281, 0.903472900390625, 0.11183929443359375, 2.9795303344726562, 0.45363616943359375, 0.3013191223144531, 0.41754913330078125, 0.052761077880859375, 0.4790496826171875, 0.06432342529296875, 0.07293701171875, 0.5132369995117188, 0.4954242706298828, -0.21590232849121094, -0.010942459106445312, 0.4431648254394531, 1.1533966064453125, 0.5880508422851562, -0.06926536560058594, -0.18208885192871094, 2.104705810546875, 0.38590240478515625, 0.4157562255859375, 0.6272048950195312, 0.5874557495117188, 0.19384765625, 0.29819488525390625, -0.4836921691894531, 0.616546630859375, -0.009979248046875, 1.5452957153320312, 0.381072998046875, 1.91741943359375, -0.27193450927734375, 0.43837738037109375, 1.1091461181640625, 0.5985031127929688, 1.6460723876953125, 0.3067970275878906, 0.41107177734375, 0.6802520751953125, 1.03887939453125, 0.45005035400390625, 1.1368484497070312, -0.19653701782226562, 0.2853717803955078, -0.1758270263671875, 0.42742156982421875, 0.5167312622070312, 0.5134811401367188, 0.7530517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3209477663040161, "std": 0.5346801280975342, "min": -1.4305572509765625, "p10": -0.15086078643798825, "median": 0.30631256103515625, "p90": 0.9560432434082038, "max": 1.6226158142089844, "pos_frac": 0.75, "sample": [0.7271003723144531, 0.6946258544921875, 0.32550811767578125, 0.1856822967529297, 0.35079002380371094, -0.167388916015625, 0.06905555725097656, 0.22422027587890625, -0.1009368896484375, 0.15371322631835938, -0.205108642578125, 1.061248779296875, 0.601348876953125, -0.22261810302734375, 0.43747711181640625, -0.11229515075683594, -0.003387451171875, 0.6094818115234375, -0.05999755859375, 0.30199432373046875, 0.22014999389648438, 0.348358154296875, 0.27663421630859375, 1.379241943359375, -0.0331268310546875, 0.8011398315429688, 0.54302978515625, 0.5303421020507812, 1.6226158142089844, 0.35066986083984375, 0.715179443359375, 0.556915283203125, -0.0450439453125, -0.08147430419921875, 0.4802894592285156, 0.400634765625, 0.4863128662109375, 0.51043701171875, 0.14151763916015625, 0.0093231201171875, 0.19637298583984375, 0.28156280517578125, -0.06375694274902344, 1.022430419921875, 1.32916259765625, 0.308685302734375, 0.63909912109375, 0.15796470642089844, 1.4008331298828125, 0.46411895751953125, 0.7941932678222656, -0.7052993774414062, -0.04734039306640625, 0.623992919921875, -0.46686553955078125, 0.3039398193359375, 0.4079132080078125, 1.395355224609375, 0.5783843994140625, 0.0323638916015625, 0.2709846496582031, -1.4305572509765625, -1.1541976928710938, 0.11765670776367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.4545968770980835, "std": 0.580596387386322, "min": -0.4996795654296875, "p10": -0.20259227752685546, "median": 0.3688812255859375, "p90": 1.1624137878417968, "max": 2.2873992919921875, "pos_frac": 0.78125, "sample": [0.8313217163085938, -0.1342296600341797, 0.78594970703125, 0.26949310302734375, 0.32234954833984375, 0.7414588928222656, -0.0903778076171875, 0.7944259643554688, 0.2894744873046875, -0.06122589111328125, 0.3324737548828125, -0.095001220703125, 1.56219482421875, 0.6922531127929688, -0.03508758544921875, 1.1573028564453125, 0.9920578002929688, 0.22082138061523438, 0.13214111328125, 1.1295089721679688, 0.01987457275390625, 0.369171142578125, -0.20410919189453125, 0.26120567321777344, 1.2472610473632812, 0.447784423828125, -0.48968505859375, 0.6872215270996094, 0.473419189453125, -0.2796630859375, 0.0387420654296875, 0.7076416015625, 1.7905426025390625, 0.1166229248046875, 0.36859130859375, 0.139373779296875, -0.17605209350585938, 0.40601158142089844, 0.47171783447265625, -0.244140625, 0.3268585205078125, 0.5653610229492188, 0.2039337158203125, 0.7804336547851562, 0.4064044952392578, -0.1990528106689453, 1.0268325805664062, 0.01529693603515625, 0.4518241882324219, -0.20674896240234375, 0.10680580139160156, 0.054485321044921875, 1.1646041870117188, 0.5922622680664062, 0.8346710205078125, 2.2873992919921875, -0.30193328857421875, 1.7874069213867188, 0.148101806640625, 0.4899864196777344, 1.7384719848632812, 0.6924667358398438, -0.4996795654296875, 0.6391716003417969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6269862651824951, "std": 0.5812134742736816, "min": -0.37693023681640625, "p10": -0.004214668273925755, "median": 0.5786685943603516, "p90": 1.4380279541015628, "max": 2.46014404296875, "pos_frac": 0.890625, "sample": [0.8638153076171875, 0.45198822021484375, -0.10606765747070312, 0.4820709228515625, 1.5224456787109375, -0.19010162353515625, 0.2446441650390625, -0.015625, -0.04760932922363281, 0.9754638671875, 0.6531219482421875, 1.038818359375, 0.7793426513671875, 0.8369026184082031, 0.365447998046875, 1.1918792724609375, 0.8212509155273438, 1.341064453125, 0.6800880432128906, 1.3029632568359375, 0.9974288940429688, 0.9395294189453125, 2.46014404296875, 0.86517333984375, 1.0176162719726562, 0.1818408966064453, 0.3135528564453125, 0.20787811279296875, -0.1735057830810547, 0.1756305694580078, 0.60675048828125, 0.08867263793945312, 0.0622711181640625, 0.022409439086914062, 0.31612586975097656, 0.2505645751953125, 1.697601318359375, 1.479583740234375, 1.516998291015625, 0.6615142822265625, 0.06835746765136719, 1.33392333984375, 0.503326416015625, 0.6279983520507812, -0.37693023681640625, 0.8445358276367188, 1.8282012939453125, 0.15890884399414062, 0.910369873046875, 0.6788864135742188, 1.7757110595703125, 0.818023681640625, 0.39696311950683594, -0.27996063232421875, 0.14829444885253906, 0.5059661865234375, 0.5505867004394531, 0.06104278564453125, 0.06081390380859375, 0.2594318389892578, 0.2128143310546875, 1.2632522583007812, 0.7739715576171875, 0.12294578552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.455756276845932, "std": 0.6212694644927979, "min": -0.9707794189453125, "p10": -0.1686676025390625, "median": 0.3766822814941406, "p90": 1.3448371887207038, "max": 2.7499618530273438, "pos_frac": 0.765625, "sample": [0.32097625732421875, 1.0946846008300781, -0.154632568359375, 0.09093093872070312, 1.41571044921875, 0.7675323486328125, -0.1746826171875, -0.0802764892578125, 1.0015106201171875, -0.9707794189453125, 2.7499618530273438, 0.23394012451171875, 0.0976409912109375, 0.42401695251464844, 0.0739898681640625, 0.2404651641845703, 0.5751800537109375, 0.6842823028564453, 0.8642578125, 0.7995338439941406, 0.1746978759765625, 0.24778366088867188, 0.24066925048828125, -0.145904541015625, 0.2834205627441406, 0.2039813995361328, 0.58782958984375, 0.4907951354980469, 0.7276573181152344, -0.5280838012695312, 1.552093505859375, 0.7362213134765625, 1.969329833984375, -0.0862884521484375, 0.2557964324951172, -0.0385894775390625, -0.23303985595703125, 0.73992919921875, 0.0663909912109375, 0.38111114501953125, 0.6009979248046875, 1.56024169921875, -0.2775421142578125, 0.4797821044921875, 0.1212921142578125, -0.3141632080078125, 0.714508056640625, 0.5193328857421875, 0.028301239013671875, -0.017597198486328125, 0.49297332763671875, 1.1794662475585938, 1.4725189208984375, -0.1065826416015625, -0.12060546875, 0.37225341796875, 0.5450248718261719, -0.2332305908203125, 0.47866058349609375, 0.24896240234375, 0.9591445922851562, 1.4775619506835938, 0.9031600952148438, 0.4038963317871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7796535491943359, "std": 0.9129959940910339, "min": -0.6838226318359375, "p10": -0.07400493621826171, "median": 0.604095458984375, "p90": 2.0114624023437506, "max": 4.723175048828125, "pos_frac": 0.875, "sample": [0.4930572509765625, 0.5006103515625, 0.32625579833984375, 1.26214599609375, 0.934234619140625, 0.95098876953125, -0.6656723022460938, 0.2277069091796875, 0.4428730010986328, 0.7781219482421875, 1.3742523193359375, 0.38470458984375, 0.21482086181640625, 0.6863632202148438, 0.119720458984375, 0.9384193420410156, -0.058094024658203125, 1.1844139099121094, -0.6838226318359375, 2.4561004638671875, 0.7416038513183594, 1.6293869018554688, 1.8296051025390625, -0.13483428955078125, 0.6479415893554688, 0.5602493286132812, 0.27105712890625, 0.05587005615234375, -0.5831336975097656, 0.8554306030273438, 0.855804443359375, 0.10352134704589844, 0.2916984558105469, 2.5919342041015625, 1.7072067260742188, 0.5059299468994141, 1.577484130859375, 0.6767959594726562, -0.08082389831542969, 0.13416290283203125, 2.5975189208984375, 0.8344268798828125, 0.00756072998046875, 0.7703018188476562, 4.723175048828125, 1.2043304443359375, 0.06940460205078125, 2.0894012451171875, 2.2071533203125, -0.28684234619140625, 0.4555835723876953, 0.47161865234375, 2.1748199462890625, 0.7835502624511719, -0.31211090087890625, 0.07854652404785156, 1.2486648559570312, 0.87738037109375, 0.01064300537109375, 1.147430419921875, 0.32579612731933594, 0.38597869873046875, 1.56854248046875, 0.3608589172363281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6041243076324463, "std": 0.6999017000198364, "min": -0.8513336181640625, "p10": -0.21276893615722656, "median": 0.5649242401123047, "p90": 1.4489803314208989, "max": 2.56805419921875, "pos_frac": 0.78125, "sample": [-0.0405120849609375, 0.49964332580566406, 0.19104766845703125, 0.6828842163085938, 2.1191558837890625, 0.12215423583984375, 0.3084716796875, 0.761444091796875, -0.11917877197265625, 0.7713165283203125, 0.20186614990234375, 0.7496566772460938, 1.856292724609375, 1.1082305908203125, 1.0775299072265625, 0.33797454833984375, 0.9718170166015625, -0.5914382934570312, 1.315643310546875, -0.8513336181640625, -0.14031219482421875, 1.6715240478515625, 0.9377593994140625, 1.0713577270507812, 0.8793411254882812, -0.20282745361328125, 1.32861328125, 0.13155364990234375, -0.18414306640625, 2.56805419921875, 0.18963623046875, -0.39649200439453125, 2.1543045043945312, 0.0060214996337890625, -0.01349639892578125, 0.5353355407714844, 0.594512939453125, -0.006603240966796875, 0.34004974365234375, 0.2556037902832031, 0.266754150390625, 0.6891403198242188, -0.338287353515625, 0.3900489807128906, 0.6704597473144531, 1.0946426391601562, 0.4140472412109375, 1.73858642578125, 0.9208450317382812, -0.22906875610351562, 0.6431732177734375, -0.30619239807128906, 1.3412551879882812, -0.21702957153320312, 1.35107421875, 0.7650909423828125, 1.3281021118164062, 0.11046600341796875, 0.9183731079101562, 0.7323493957519531, 1.121673583984375, 0.38674163818359375, 0.18831253051757812, 1.4909400939941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.7559161186218262, "std": 0.8909318447113037, "min": -0.49982452392578125, "p10": -0.17919960021972653, "median": 0.6216468811035156, "p90": 2.200646209716797, "max": 2.9101715087890625, "pos_frac": 0.734375, "sample": [2.2445755004882812, -0.314208984375, -0.2647552490234375, -0.018337249755859375, 2.9101715087890625, 0.5397167205810547, 1.3574371337890625, 1.229644775390625, -0.146697998046875, 0.1887359619140625, 0.5485992431640625, -0.14276695251464844, 0.9389801025390625, 0.1338348388671875, 2.3433990478515625, 2.041046142578125, 0.2809734344482422, -0.07961273193359375, 2.3356781005859375, 0.0756683349609375, 1.0349655151367188, -0.49982452392578125, 0.8975963592529297, 1.4773979187011719, 0.95892333984375, 1.4300918579101562, 1.8724212646484375, -0.4244117736816406, 0.21533203125, 0.7963981628417969, 0.6422576904296875, 0.634521484375, 0.9137916564941406, 0.8878879547119141, 2.563201904296875, 1.0241546630859375, 0.8103790283203125, 2.09814453125, 0.23275375366210938, -0.19224929809570312, -0.226165771484375, 0.45401573181152344, 0.9378795623779297, 1.2517318725585938, 0.6648406982421875, 1.1154975891113281, -0.09891510009765625, -0.026775360107421875, 1.8759078979492188, -0.1159210205078125, 0.3456077575683594, -0.2333526611328125, 0.21941375732421875, 0.6087722778320312, 0.1105194091796875, -0.06910133361816406, 2.6910476684570312, -0.00121307373046875, -0.14875030517578125, 0.918914794921875, 1.8325653076171875, 2.5880126953125, 0.10651206970214844, 0.001773834228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1839113235473633, "std": 1.1648746728897095, "min": -0.6998062133789062, "p10": 0.03511772155761721, "median": 0.8715944290161133, "p90": 2.424264526367188, "max": 6.702545166015625, "pos_frac": 0.921875, "sample": [1.5087127685546875, 1.3504867553710938, 3.29400634765625, 1.9464912414550781, -0.26476287841796875, 0.9921073913574219, 0.456268310546875, -0.33984375, 0.11785507202148438, 1.5682373046875, 2.1190338134765625, 0.2868003845214844, 0.8423690795898438, 0.6700477600097656, 0.027400970458984375, 0.47927093505859375, 2.2074432373046875, 3.8472366333007812, 0.83807373046875, 0.47344970703125, 0.8476581573486328, 2.0143585205078125, 3.4216461181640625, -0.0605621337890625, 0.57696533203125, 0.6333847045898438, 0.16665267944335938, 0.8574752807617188, 0.0575103759765625, 1.21649169921875, 6.702545166015625, 1.6593170166015625, 0.83648681640625, 1.2130355834960938, 0.5167198181152344, 2.62591552734375, 0.3964195251464844, 1.7793426513671875, 2.0584716796875, 2.5091400146484375, 0.526092529296875, 2.2383270263671875, 1.0389328002929688, 1.5889205932617188, 0.6541099548339844, 0.05312347412109375, 1.51275634765625, 2.2245330810546875, 1.0032577514648438, 0.7273483276367188, 0.8857135772705078, 0.10546875, 0.8194580078125, 1.6051177978515625, 0.7427902221679688, 1.1264381408691406, 1.287994384765625, 0.025587081909179688, 1.2847824096679688, -0.6998062133789062, 0.49095916748046875, 2.5039520263671875, 1.6683502197265625, -0.09354019165039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1760879755020142, "std": 1.3324817419052124, "min": -2.9143905639648438, "p10": -0.13614082336425776, "median": 1.075190544128418, "p90": 2.9579605102539075, "max": 4.51483154296875, "pos_frac": 0.828125, "sample": [1.25433349609375, -0.018011093139648438, 1.1426219940185547, 1.4262542724609375, 1.4538803100585938, 0.7426071166992188, 1.736907958984375, -0.5084457397460938, -0.16312026977539062, 2.113494873046875, 2.4500503540039062, 0.018381118774414062, -0.31264495849609375, 0.4693126678466797, 0.0151824951171875, 1.2799453735351562, -0.053844451904296875, 1.510986328125, 1.7447967529296875, 0.3872833251953125, -0.0064239501953125, -0.6350421905517578, 0.18985748291015625, 0.16949081420898438, 3.5717239379882812, 4.1730804443359375, 1.8537178039550781, -0.7015762329101562, 3.23358154296875, 0.7422561645507812, 0.6931686401367188, 1.4621429443359375, -0.07318878173828125, 3.519683837890625, 0.25800323486328125, 0.07697105407714844, 2.44049072265625, 0.41327667236328125, 2.3362960815429688, 0.8310546875, 2.1562271118164062, 1.2127132415771484, 2.550567626953125, 1.175954818725586, 3.069549560546875, 1.2262725830078125, 0.37056732177734375, 4.51483154296875, 0.9289588928222656, -2.9143905639648438, 2.397533416748047, 0.8883514404296875, 2.2650604248046875, 1.0077590942382812, 2.6975860595703125, 1.1773357391357422, -0.3577423095703125, 0.4245433807373047, 2.3084564208984375, 0.6686668395996094, 1.3353500366210938, 3.976715087890625, 0.7414474487304688, 0.20878219604492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0368335247039795, "std": 1.3945993185043335, "min": -1.4842529296875, "p10": -0.2816133499145508, "median": 0.6734199523925781, "p90": 2.8114158630371096, "max": 6.975982666015625, "pos_frac": 0.796875, "sample": [1.8424530029296875, -0.010080337524414062, 1.0079269409179688, 1.1282119750976562, 0.2503662109375, -0.5421600341796875, -0.3321990966796875, 0.8753280639648438, 0.9111328125, -0.011127471923828125, 0.4281463623046875, 3.1217803955078125, 0.6329269409179688, -0.75604248046875, 0.536041259765625, 0.35186004638671875, 0.6088905334472656, 0.08539390563964844, 1.843790054321289, 6.975982666015625, 1.0340499877929688, 3.910369873046875, 1.1887931823730469, 0.7386894226074219, 0.8312911987304688, 1.5913772583007812, 0.5305938720703125, 0.2173309326171875, 1.0344696044921875, 1.7071552276611328, 0.262054443359375, 0.45505523681640625, 0.3539619445800781, -0.7593841552734375, 0.45001983642578125, -0.09436416625976562, 0.1856250762939453, 1.974395751953125, 2.4093475341796875, 4.1029815673828125, -0.2816448211669922, 2.834320068359375, 0.7139129638671875, 1.037750244140625, -0.2815399169921875, 1.053445816040039, 0.14701271057128906, -0.23674774169921875, 1.0389251708984375, 2.1563377380371094, 2.7579727172851562, -0.07375717163085938, 2.3974075317382812, 2.8878402709960938, 0.32143211364746094, 2.3618850708007812, 1.8156051635742188, -1.4842529296875, 0.1277446746826172, -0.49005889892578125, 2.6074790954589844, 3.3767547607421875, 0.2640380859375, 0.2330474853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.440582513809204, "std": 1.6186119318008423, "min": -1.0601272583007812, "p10": 0.059970092773437525, "median": 0.9300365447998047, "p90": 3.110414123535157, "max": 7.52691650390625, "pos_frac": 0.921875, "sample": [0.1803131103515625, 7.52691650390625, 4.257354736328125, 1.8149642944335938, 0.296142578125, 0.28843116760253906, 1.5433120727539062, 0.2144775390625, 0.703216552734375, 2.217254638671875, 1.5897369384765625, 1.629852294921875, 0.688812255859375, 2.9165496826171875, 1.3317794799804688, 0.24283790588378906, 0.54119873046875, 0.28574371337890625, -0.2423534393310547, 2.25811767578125, 0.8905029296875, 2.1641387939453125, 2.8756675720214844, 0.24689674377441406, 3.8358001708984375, 0.2535972595214844, 2.240936279296875, 2.4730072021484375, 1.5536308288574219, 0.6369361877441406, 0.5301628112792969, 3.190765380859375, -1.0601272583007812, 0.8423614501953125, 0.5708942413330078, 0.3881340026855469, 0.6960067749023438, 6.3589019775390625, 0.9695701599121094, 0.08324432373046875, 2.9229278564453125, 4.7562255859375, 2.0233154296875, 0.5848617553710938, 0.36478424072265625, -0.33126068115234375, 0.392791748046875, 0.04999542236328125, 0.8762245178222656, 1.6697540283203125, -0.5834884643554688, 0.46910858154296875, 0.00724029541015625, 1.5966529846191406, 1.2812366485595703, 2.1001052856445312, 1.203460693359375, 5.74884033203125, 1.2128448486328125, 0.51776123046875, 1.84332275390625, -0.23575210571289062, 1.3974075317382812, 2.3032302856445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6722155809402466, "std": 1.4027701616287231, "min": -2.1577606201171875, "p10": 0.3071746826171876, "median": 1.4045982360839844, "p90": 3.787967681884767, "max": 4.9370880126953125, "pos_frac": 0.921875, "sample": [2.6805953979492188, 2.0761260986328125, 4.9370880126953125, 2.1060943603515625, 1.228515625, -0.5215911865234375, 2.9737625122070312, 2.830677032470703, 1.582571029663086, 1.9200458526611328, 0.7153739929199219, 4.044731140136719, 1.3022003173828125, 1.3995113372802734, 4.228813171386719, 1.05206298828125, -0.022205352783203125, 0.6290435791015625, 0.6133270263671875, 0.5602340698242188, 0.9107666015625, 2.0433578491210938, 2.3717269897460938, 1.0464611053466797, 1.2720565795898438, 2.4057388305664062, -0.2946624755859375, 1.9521827697753906, 4.580078125, 0.2656097412109375, 0.519073486328125, 0.8105697631835938, 2.9439926147460938, 2.2450408935546875, -2.1577606201171875, 2.026447296142578, 0.4337329864501953, 1.685781478881836, 3.4396800994873047, 4.737945556640625, 1.4096851348876953, 2.397796630859375, 4.116172790527344, 1.134307861328125, 0.9608268737792969, 0.48456573486328125, 0.0426025390625, 0.4817695617675781, 1.7241134643554688, 1.1093578338623047, 0.4041595458984375, 3.4723129272460938, 3.3363494873046875, 1.91607666015625, 0.6767578125, 3.923248291015625, 3.1713104248046875, 1.0358943939208984, 1.6293659210205078, 0.41415977478027344, 0.6433448791503906, -0.027828216552734375, 0.7521934509277344, 2.2384567260742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.534616470336914, "std": 1.7534064054489136, "min": -1.17205810546875, "p10": -0.24112968444824215, "median": 1.1466178894042969, "p90": 3.6032913208007815, "max": 7.4933319091796875, "pos_frac": 0.828125, "sample": [-0.6027069091796875, 0.9896392822265625, 1.2027053833007812, 0.9969711303710938, 1.4336433410644531, 0.5959701538085938, 0.6089553833007812, 0.268585205078125, 0.15528106689453125, 2.1425209045410156, 7.4933319091796875, 4.219329833984375, 0.5521965026855469, 3.010730743408203, 1.525970458984375, -0.4760322570800781, 2.288595199584961, 2.17828369140625, 0.07696533203125, -1.17205810546875, 2.82220458984375, -0.10637474060058594, 0.57000732421875, -0.21533203125, 2.0966033935546875, 2.2313461303710938, 0.6246833801269531, 0.9114017486572266, 0.4934959411621094, -0.4933929443359375, 3.65185546875, 0.039676666259765625, 3.4899749755859375, 0.0116729736328125, 0.42041587829589844, 4.468292236328125, -0.10320663452148438, 3.3642120361328125, 0.864715576171875, 1.3452301025390625, 4.961540222167969, 0.9419746398925781, 1.41815185546875, 7.469940185546875, -0.2521858215332031, 1.7629928588867188, -0.424957275390625, 1.6345405578613281, 3.0891036987304688, 2.019866943359375, 1.72955322265625, 2.392559051513672, 3.30352783203125, 1.9648418426513672, 1.0451126098632812, 1.0905303955078125, 4.428924560546875, -0.1164093017578125, 1.8161392211914062, 0.3967742919921875, -0.5368576049804688, 1.6655426025390625, 2.260242462158203, 0.17763900756835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8351190090179443, "std": 2.0721254348754883, "min": -3.082733154296875, "p10": -0.1660316467285156, "median": 1.3586997985839844, "p90": 4.276817321777345, "max": 9.057609558105469, "pos_frac": 0.875, "sample": [0.9724578857421875, 0.9150390625, 3.8496246337890625, 1.4414329528808594, 0.7476730346679688, 2.4885711669921875, 3.9256134033203125, 2.5008087158203125, -0.53631591796875, 3.58990478515625, 5.803436279296875, 1.0721054077148438, 1.9199066162109375, 9.057609558105469, 0.2318572998046875, 0.7808990478515625, 1.3659744262695312, 1.5006752014160156, 0.355224609375, -1.7166595458984375, 1.3514251708984375, 3.875865936279297, 4.381187438964844, 0.9769649505615234, 6.729949951171875, 1.5123481750488281, 3.950592041015625, 5.183563232421875, 0.759552001953125, 0.16132354736328125, 2.4889602661132812, 0.343597412109375, 6.800048828125, 0.5770931243896484, 1.1477737426757812, 4.033287048339844, 1.4053421020507812, 2.51922607421875, -0.1792449951171875, 1.84136962890625, 3.395843505859375, 1.9881095886230469, -3.082733154296875, 2.6069793701171875, -0.1917724609375, 0.287017822265625, 1.713836669921875, 2.0814590454101562, 2.011524200439453, 0.3065643310546875, 0.8729400634765625, 0.91363525390625, 0.841766357421875, 0.6478347778320312, 0.8519325256347656, 0.8175563812255859, -0.13520050048828125, 3.384357452392578, 1.07666015625, 0.5636672973632812, -0.6650390625, -0.20952987670898438, 2.0945587158203125, 5.1495819091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.6524174213409424, "std": 2.5159566402435303, "min": -1.7881393432617188, "p10": 0.22321243286132816, "median": 2.2589893341064453, "p90": 6.321748352050782, "max": 12.489166259765625, "pos_frac": 0.90625, "sample": [1.9779586791992188, -0.05236053466796875, 1.9837703704833984, 9.310287475585938, 12.489166259765625, 2.7237548828125, 0.40036773681640625, 6.2620086669921875, 2.4205856323242188, 2.626129150390625, 2.530975341796875, 6.5785064697265625, 3.2099475860595703, 7.76727294921875, 0.7820873260498047, 1.59613037109375, 2.8682022094726562, -0.1474761962890625, 2.7395095825195312, 3.6864852905273438, 1.7299766540527344, 1.5688438415527344, 3.8707504272460938, 0.26712799072265625, 0.891571044921875, 3.1176910400390625, -0.3879852294921875, 2.4911651611328125, 2.233654022216797, 0.2043914794921875, 2.2912635803222656, 6.0832366943359375, 0.5084953308105469, 6.422607421875, 2.5250930786132812, 1.9420928955078125, 1.0544853210449219, 1.3792915344238281, 1.6680908203125, -0.8374557495117188, 4.092315673828125, 0.6379928588867188, 1.9027938842773438, 6.7063751220703125, 1.2740440368652344, 3.9019241333007812, 0.9872455596923828, 0.6628990173339844, 1.3021717071533203, 1.7163009643554688, 0.6426467895507812, 6.34735107421875, -1.7881393432617188, 0.7213420867919922, 2.2843246459960938, 3.5356178283691406, -0.001255035400390625, 2.2863616943359375, 5.0611419677734375, 2.3260498046875, 4.5971527099609375, 5.248077392578125, 0.9075603485107422, 3.624725341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.4994332790374756, "std": 2.033482313156128, "min": -1.5889892578125, "p10": 0.28582458496093766, "median": 2.276844024658203, "p90": 4.799052047729493, "max": 10.254364013671875, "pos_frac": 0.9375, "sample": [-0.06859207153320312, 0.23128890991210938, -0.016870498657226562, 3.6894683837890625, 4.3818206787109375, 0.694183349609375, 4.0546875, 2.7861480712890625, 2.8703136444091797, 6.89764404296875, 0.9884719848632812, 1.910287857055664, 0.9607028961181641, 2.296966552734375, 3.0310325622558594, 1.1750602722167969, 3.281524658203125, 5.504371643066406, 1.6220855712890625, 0.5330314636230469, 6.594879150390625, 2.7140426635742188, 5.527351379394531, 0.9845046997070312, 2.7185287475585938, 2.2426223754882812, 0.4130744934082031, 2.630979537963867, 0.016233444213867188, 4.5460662841796875, 1.8031082153320312, 2.700397491455078, 3.6526947021484375, 2.638946533203125, 3.2508621215820312, 3.9959449768066406, 0.8242874145507812, 2.2567214965820312, 1.8021793365478516, 0.53387451171875, 1.4396133422851562, 1.6033439636230469, 6.549346923828125, 3.91180419921875, 0.4605712890625, -1.5889892578125, 1.229116439819336, 3.3847885131835938, 2.0220947265625, 3.909423828125, 2.6050987243652344, 3.6737136840820312, 1.7665023803710938, 0.5768966674804688, 1.111419677734375, 10.254364013671875, 4.1552734375, 2.1292648315429688, -0.21343040466308594, 0.6931838989257812, 2.6576156616210938, 4.907474517822266, 3.862213134765625, 0.19209671020507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.8983850479125977, "std": 2.278754234313965, "min": -0.7607574462890625, "p10": 0.5404434204101562, "median": 2.5595779418945312, "p90": 6.073910522460938, "max": 10.13787841796875, "pos_frac": 0.9375, "sample": [4.186031341552734, 2.61102294921875, 1.91583251953125, 0.5455837249755859, 7.3099517822265625, 2.7012481689453125, 3.188934326171875, 0.8797683715820312, 2.2759017944335938, 5.681495666503906, 6.479827880859375, 0.9247398376464844, 5.517280578613281, 5.5349273681640625, 5.0732421875, 3.2672653198242188, 0.7439460754394531, 2.031158447265625, 1.8680648803710938, 5.836639404296875, 0.2719001770019531, 2.5081329345703125, 2.797060012817383, 8.043426513671875, 2.978973388671875, 3.4132766723632812, 0.51678466796875, 1.5135498046875, 6.346900939941406, 1.6819992065429688, 2.7929153442382812, -0.1731109619140625, 2.1547412872314453, 1.3634300231933594, 1.8677234649658203, 3.3607711791992188, 0.9817581176757812, 1.7796249389648438, 5.5723876953125, 2.8437957763671875, 1.3315582275390625, 1.7617874145507812, 3.9629058837890625, 3.6080856323242188, 1.3813018798828125, 3.0084228515625, 3.447418212890625, 3.0510482788085938, 0.5382404327392578, 0.6307640075683594, 6.17559814453125, 4.500724792480469, 2.245880126953125, 3.5866165161132812, 1.2373294830322266, 10.13787841796875, 0.6600303649902344, -0.7607574462890625, 3.385498046875, 1.7248878479003906, -0.7406597137451172, -0.27413177490234375, 1.5175209045410156, 8.189796447753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.2729620933532715, "std": 2.5881593227386475, "min": -1.5358314514160156, "p10": -0.0774955749511718, "median": 1.6483497619628906, "p90": 5.880645751953126, "max": 12.5970458984375, "pos_frac": 0.875, "sample": [3.2771453857421875, 0.971649169921875, 0.2411956787109375, 2.7214012145996094, 0.3322563171386719, 1.5482406616210938, 3.2880477905273438, 6.12261962890625, 4.56500244140625, 3.1433258056640625, 0.718963623046875, 7.040687561035156, 3.673929214477539, 0.37591552734375, -0.007867813110351562, 2.0690231323242188, 0.10729217529296875, 2.3048858642578125, 0.118438720703125, 5.624187469482422, 0.1741180419921875, 0.2554759979248047, 2.7866287231445312, 0.6935920715332031, 0.6809558868408203, 5.45020866394043, 6.911079406738281, 4.826850891113281, 0.34798240661621094, 0.7075862884521484, 5.069793701171875, 2.7899551391601562, 5.6299591064453125, 0.29308319091796875, -0.499176025390625, 1.576568603515625, 6.4876556396484375, 3.6886215209960938, -0.36122894287109375, 1.9826507568359375, 1.2123146057128906, 0.32318115234375, -1.5358314514160156, -1.009674072265625, 1.0417633056640625, 12.5970458984375, 1.0123519897460938, 7.374702453613281, 3.4458541870117188, -1.1240119934082031, 2.2146053314208984, 1.7201309204101562, 1.88507080078125, 0.30738067626953125, 1.3784751892089844, 5.9880828857421875, 0.965240478515625, 2.5081939697265625, 2.2774887084960938, 1.9331207275390625, -0.10733604431152344, 3.8381996154785156, 0.7942047119140625, -1.2996864318847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.92849063873291, "std": 3.2605841159820557, "min": -2.841827392578125, "p10": -0.21282348632812487, "median": 2.163148880004883, "p90": 7.001345443725587, "max": 15.668792724609375, "pos_frac": 0.875, "sample": [0.5217266082763672, 0.39978790283203125, 5.6032257080078125, 7.1109466552734375, 4.169441223144531, -2.3881607055664062, 2.0680618286132812, 1.6814918518066406, 7.2320709228515625, 3.5573501586914062, 6.824649810791016, 1.4888381958007812, 0.30977630615234375, 2.3548660278320312, 4.72674560546875, 0.3633003234863281, -0.27660369873046875, 0.8622817993164062, 4.868843078613281, 11.816680908203125, -0.5002098083496094, 0.003082275390625, -0.452606201171875, 4.2276763916015625, 0.7515220642089844, 4.235862731933594, 4.6331024169921875, 1.1226692199707031, 4.437568664550781, 2.2582359313964844, 4.53790283203125, 0.3196735382080078, -2.841827392578125, 1.0405960083007812, 3.0038604736328125, 6.045051574707031, 5.392974853515625, -0.06400299072265625, 8.005767822265625, 0.276947021484375, 0.21520614624023438, 3.1075439453125, 5.1699066162109375, -0.42403411865234375, 15.668792724609375, 0.19696807861328125, 6.360015869140625, 6.0102081298828125, 0.8235054016113281, 2.5379180908203125, 1.5166549682617188, 8.578353881835938, 3.0651607513427734, 0.22615814208984375, 1.931488037109375, 7.0770721435546875, 1.1004600524902344, 2.4608936309814453, 3.3966064453125, 5.3185882568359375, 1.266998291015625, 0.9957714080810547, 1.630453109741211, -0.5364608764648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 3.000969409942627, "std": 2.681292772293091, "min": -3.1239852905273438, "p10": 0.1700134277343751, "median": 2.300374984741211, "p90": 6.670513916015625, "max": 10.51898193359375, "pos_frac": 0.90625, "sample": [1.8579254150390625, 3.246990203857422, 1.0083465576171875, 1.3038082122802734, 4.591484069824219, 1.8606681823730469, 0.8201751708984375, 1.9236679077148438, 7.1235198974609375, 3.0167198181152344, 0.28093719482421875, 7.115692138671875, 6.685844421386719, 1.1270065307617188, 3.1917266845703125, 2.4566307067871094, 0.4098358154296875, 1.65240478515625, 2.019073486328125, 5.853607177734375, 6.634742736816406, 1.8214797973632812, 0.7707138061523438, 3.4070262908935547, 3.6492385864257812, 1.5646171569824219, -1.0234832763671875, 6.15289306640625, 7.35772705078125, 3.033414840698242, -0.2995281219482422, 3.5365638732910156, 6.524318695068359, 3.8448028564453125, -3.1239852905273438, 0.38712501525878906, 0.12247467041015625, 1.261444091796875, 1.2032241821289062, 10.51898193359375, 5.9186248779296875, 2.4974708557128906, 1.3589534759521484, 3.05206298828125, 9.329193115234375, 6.200439453125, -0.1567230224609375, 3.619508743286133, 10.0440673828125, -0.0567779541015625, 2.297119140625, 1.912322998046875, -0.059814453125, 1.728780746459961, 2.7522125244140625, 1.1540985107421875, 4.965065002441406, 1.73175048828125, 3.7854766845703125, 2.1365890502929688, 4.5256500244140625, 4.034332275390625, 2.0981578826904297, 2.303630828857422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 3.1809351444244385, "std": 3.3035202026367188, "min": -8.181365966796875, "p10": 0.03130760192871096, "median": 2.479024887084961, "p90": 8.127516174316407, "max": 12.828140258789062, "pos_frac": 0.921875, "sample": [4.085296630859375, 0.6470756530761719, 8.399795532226562, 9.266395568847656, 7.359596252441406, 2.986797332763672, -1.3137664794921875, 2.099853515625, 5.452445983886719, 5.0425567626953125, 1.719451904296875, 5.9320526123046875, 2.0820999145507812, 0.0520477294921875, 0.7291336059570312, 1.6399574279785156, 7.891387939453125, -0.05071830749511719, 1.3083419799804688, 0.004825592041015625, 0.9834251403808594, 8.634994506835938, -0.5987606048583984, 2.4318618774414062, 6.865226745605469, 1.3601455688476562, 3.3597030639648438, 4.000560760498047, 0.022418975830078125, 3.6555614471435547, 1.8777923583984375, 2.386789321899414, 4.315986633300781, 2.7389564514160156, 9.922607421875, 3.868091583251953, 5.077522277832031, 9.309494018554688, 3.7639007568359375, 0.8119125366210938, 1.0307388305664062, 1.5013370513916016, 0.985870361328125, 3.2448158264160156, 0.8045654296875, 4.0361480712890625, 3.2842845916748047, 2.09100341796875, -1.8998470306396484, 8.228713989257812, 3.120105743408203, 3.109395980834961, 1.8948974609375, 12.828140258789062, 5.256034851074219, -8.181365966796875, 6.849346160888672, 1.654541015625, 0.6140060424804688, 1.1336822509765625, 4.69842529296875, 2.2141380310058594, 2.509502410888672, 2.44854736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.9254512786865234, "std": 4.227663516998291, "min": -3.8404159545898438, "p10": 0.05672607421875009, "median": 3.4076900482177734, "p90": 8.711805725097658, "max": 19.096160888671875, "pos_frac": 0.90625, "sample": [3.41455078125, 0.9191131591796875, 0.40201568603515625, 3.7656326293945312, 2.654329299926758, 6.039287567138672, 0.6027069091796875, 2.8698482513427734, -1.71746826171875, 0.689422607421875, 0.21289634704589844, 2.200838088989258, 3.070770263671875, 9.192710876464844, 8.390975952148438, 1.7086639404296875, 4.412960052490234, 2.5686111450195312, 4.194816589355469, 1.3861541748046875, 2.079427719116211, -3.239734649658203, 4.2733001708984375, 0.1445598602294922, 14.651214599609375, 11.499740600585938, 1.79827880859375, 5.737224578857422, 3.7534027099609375, 0.2294635772705078, 8.049072265625, 19.096160888671875, 5.432945251464844, 1.5226211547851562, -0.9392166137695312, -3.8404159545898438, 4.1967010498046875, 3.8502273559570312, 4.1444091796875, 5.527580261230469, 3.1042327880859375, 0.019083023071289062, -0.607421875, 5.233867645263672, 5.403373718261719, 16.23602294921875, 2.7776451110839844, 2.6941604614257812, 8.84930419921875, 6.9049835205078125, 6.826942443847656, 3.400829315185547, 5.170131683349609, 4.907600402832031, 13.112930297851562, 0.9559555053710938, -1.20611572265625, 3.9948654174804688, 1.9912834167480469, 4.4006805419921875, 4.9463348388671875, 1.7011947631835938, 4.535701751708984, 0.92950439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 4.156033515930176, "std": 4.063874244689941, "min": -3.701414108276367, "p10": -0.4936592102050781, "median": 3.3853511810302734, "p90": 9.52548980712891, "max": 15.428131103515625, "pos_frac": 0.859375, "sample": [7.249603271484375, -0.4499053955078125, -1.971343994140625, 10.186920166015625, 1.38079833984375, 2.353118896484375, 8.541770935058594, 2.3949432373046875, 3.8388748168945312, 1.7807207107543945, -0.5089874267578125, 7.63861083984375, 4.24871826171875, 15.428131103515625, 0.3411407470703125, 3.47747802734375, 8.11737060546875, 3.293224334716797, 9.80621337890625, 4.6004638671875, 0.8815269470214844, 6.0791015625, 10.401687622070312, 2.4500198364257812, 2.427886962890625, 5.119863510131836, 0.3855133056640625, -0.45789337158203125, 2.9902114868164062, 8.870468139648438, 7.150299072265625, 5.3484954833984375, 9.81783676147461, 14.6739501953125, -3.701414108276367, 0.4182929992675781, 0.41949462890625, 1.9778823852539062, 0.2560596466064453, 3.9712448120117188, 2.519073486328125, 1.7223968505859375, 6.966251373291016, 3.8464126586914062, 8.59665298461914, 7.345298767089844, -0.7110519409179688, 1.9861297607421875, 6.0119781494140625, -0.928192138671875, 6.4196319580078125, 2.7899398803710938, 4.461174011230469, 5.955192565917969, 2.360260009765625, 6.087699890136719, 8.238525390625, 1.987701416015625, 12.622367858886719, -1.0511627197265625, 7.01165771484375, 2.36944580078125, -1.9330902099609375, 0.08346176147460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 4.162949562072754, "std": 5.3499016761779785, "min": -5.723236083984375, "p10": -0.8413900375366209, "median": 2.758298873901367, "p90": 10.531216430664063, "max": 21.338150024414062, "pos_frac": 0.828125, "sample": [10.575569152832031, 4.5973663330078125, 0.6525936126708984, -0.6630401611328125, 0.8560714721679688, -0.62030029296875, 9.841278076171875, -0.9178256988525391, 1.9943466186523438, 0.0317230224609375, 11.630340576171875, 7.243221282958984, 2.0955562591552734, 4.916877746582031, 2.058652877807617, 4.686309814453125, 4.5271148681640625, -1.2190113067626953, 10.427726745605469, 6.668983459472656, 4.833736419677734, -1.01007080078125, 0.09249687194824219, 3.8422088623046875, 1.8326797485351562, 8.027530670166016, 0.7344131469726562, 6.446449279785156, 0.7347831726074219, 5.826026916503906, 21.338150024414062, 2.9912376403808594, 0.3423919677734375, 5.8219451904296875, 3.7971458435058594, 15.541229248046875, 1.4696426391601562, 17.666473388671875, 5.34808349609375, 2.065784454345703, 2.4052734375, 1.4658699035644531, 3.9800186157226562, 9.892890930175781, 1.4921913146972656, 0.62249755859375, 6.2615966796875, -4.577568054199219, -0.3701496124267578, 19.25701904296875, 14.88580322265625, 7.113868713378906, 8.04336166381836, -1.1325759887695312, 1.6412410736083984, 6.356773376464844, 0.07155990600585938, 0.11133193969726562, 2.525360107421875, -5.723236083984375, 3.6894073486328125, -1.689422607421875, 3.0657272338867188, -0.08594512939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 6.323929309844971, "std": 5.434462070465088, "min": -2.6152420043945312, "p10": 0.9404533386230469, "median": 5.145845413208008, "p90": 13.418039321899416, "max": 24.074172973632812, "pos_frac": 0.921875, "sample": [1.6479167938232422, 12.293914794921875, 4.9629058837890625, 12.948959350585938, 7.765594482421875, 11.305816650390625, 3.319915771484375, 7.67279052734375, 8.019775390625, 14.81219482421875, -1.008819580078125, 13.55648422241211, 5.178321838378906, 5.9345550537109375, 9.657636642456055, 8.756078720092773, 2.6461868286132812, 5.386251449584961, 19.577835083007812, 10.242538452148438, 3.735933303833008, 1.2992782592773438, 13.095001220703125, 3.070270538330078, 0.1336517333984375, 8.307693481445312, 3.2385787963867188, 1.9710350036621094, 9.163755416870117, 24.074172973632812, 10.65179443359375, 1.5427570343017578, 12.106468200683594, 5.000347137451172, 3.9608535766601562, 14.362251281738281, 3.9958953857421875, -1.239013671875, 5.417381286621094, -2.6152420043945312, -2.5238418579101562, 2.4846343994140625, 6.272451400756836, 5.7917633056640625, 1.7260169982910156, 10.350898742675781, 4.590381622314453, 14.368072509765625, 7.660449981689453, 1.964120864868164, 1.0133628845214844, 3.1857147216796875, 3.819366455078125, 5.113368988037109, 3.3549156188964844, 10.776073455810547, 6.498802185058594, 1.0898361206054688, 18.314849853515625, -1.5419692993164062, 6.0547637939453125, 4.8449249267578125, 2.6635894775390625, 0.9092063903808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 6.379033088684082, "std": 7.087396621704102, "min": -4.60845947265625, "p10": 0.5918464660644533, "median": 4.36530876159668, "p90": 16.181766510009766, "max": 31.055145263671875, "pos_frac": 0.90625, "sample": [-1.6730670928955078, 4.38397216796875, 4.32635498046875, 2.841705322265625, 0.8075752258300781, 3.9940261840820312, 13.832252502441406, -0.017726898193359375, 8.074661254882812, -2.386871337890625, 2.019287109375, 29.818084716796875, -3.1981887817382812, 1.2291526794433594, 7.324676513671875, 8.220500946044922, 1.6723480224609375, 11.618583679199219, 3.0722274780273438, 13.929092407226562, 4.252525329589844, 6.234138488769531, 13.356887817382812, 6.454902648925781, 17.116470336914062, 0.9989547729492188, 5.327339172363281, 0.7894744873046875, 6.355068206787109, 1.8417701721191406, 19.993194580078125, 9.299884796142578, 7.441928863525391, 9.201370239257812, 4.346645355224609, 0.5071487426757812, 0.9538726806640625, 4.481845855712891, 1.1484317779541016, 4.705902099609375, 2.89190673828125, 8.474302291870117, 1.8055667877197266, 15.797538757324219, -4.60845947265625, 19.023239135742188, 20.16876220703125, 3.5108489990234375, 5.697967529296875, 3.2734432220458984, 1.7884559631347656, 5.1099853515625, 2.8914337158203125, 1.7289581298828125, 3.4996871948242188, 4.479804992675781, 3.2612228393554688, 31.055145263671875, 3.000293731689453, 16.346435546875, 4.7429656982421875, 4.656768798828125, 15.593574523925781, -0.6281223297119141], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.610658168792725, "std": 5.312999725341797, "min": -1.10577392578125, "p10": 0.38770198822021495, "median": 4.482248306274414, "p90": 12.80023422241211, "max": 23.4805908203125, "pos_frac": 0.9375, "sample": [8.203140258789062, 6.8456878662109375, 9.533893585205078, 1.2104949951171875, 0.4817695617675781, 3.12750244140625, -0.13911056518554688, 2.959217071533203, 7.2579498291015625, 7.988994598388672, 0.8605804443359375, 6.32501220703125, 12.916595458984375, 2.0225486755371094, 4.277088165283203, 12.528724670410156, 0.1366405487060547, 5.34422492980957, 5.911840438842773, 5.874931335449219, 0.08230972290039062, 1.7358779907226562, 18.22370147705078, 1.2593536376953125, 7.736076354980469, 0.49745941162109375, 5.3241424560546875, 5.316886901855469, 6.4030303955078125, 6.605228424072266, 1.63494873046875, -0.45827484130859375, 8.63330078125, -0.047069549560546875, 1.1405391693115234, 3.4899368286132812, 13.539726257324219, 2.7428359985351562, 5.915672302246094, 14.027519226074219, 1.9326610565185547, 2.7067031860351562, 23.4805908203125, 11.168167114257812, -1.10577392578125, 3.1543006896972656, 0.34738731384277344, 15.096389770507812, 1.1770305633544922, 4.687408447265625, 1.6775970458984375, 4.839746475219727, 11.224945068359375, 1.6520614624023438, 2.645862579345703, 20.257843017578125, 0.9572219848632812, 5.652128219604492, 2.4469070434570312, 11.041046142578125, 2.6245956420898438, 11.342041015625, 1.5491867065429688, 11.055160522460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.613970756530762, "std": 7.787161350250244, "min": -11.959762573242188, "p10": -1.5831209182739256, "median": 5.625062942504883, "p90": 17.40348815917969, "max": 29.507064819335938, "pos_frac": 0.78125, "sample": [8.008049011230469, 10.643608093261719, 5.269081115722656, -4.0501861572265625, 26.08740234375, -2.2135696411132812, 7.511577606201172, 4.683238983154297, 17.92595672607422, -1.2151107788085938, 7.144683837890625, 6.083400726318359, 2.4364471435546875, 11.733200073242188, 21.37847900390625, 9.708816528320312, 5.258262634277344, 10.74664306640625, 29.507064819335938, 3.1124534606933594, 11.290328979492188, -1.968048095703125, 4.348468780517578, 9.04559326171875, 25.69195556640625, 0.0305633544921875, 18.073089599609375, 0.093414306640625, 0.42371559143066406, 8.886726379394531, 5.283531188964844, -3.342315673828125, 7.075969696044922, -0.010929107666015625, 13.480209350585938, 5.089790344238281, -1.4066829681396484, -0.4675750732421875, -0.06963729858398438, -5.616493225097656, 9.965263366699219, 3.6093711853027344, 2.992919921875, 16.18439483642578, 7.006553649902344, -1.6587371826171875, 0.5600681304931641, 12.271224975585938, 5.507726669311523, -11.959762573242188, 2.8585262298583984, 15.8607177734375, 7.6556549072265625, 4.518535614013672, 7.172187805175781, 5.742399215698242, -1.2647018432617188, 7.930450439453125, 8.559394836425781, 14.486656188964844, 6.081550598144531, -0.161163330078125, 5.138580322265625, 18.545120239257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 6.402560234069824, "std": 7.783745765686035, "min": -10.837894439697266, "p10": -1.6945697784423825, "median": 5.3496551513671875, "p90": 16.62171173095704, "max": 29.196762084960938, "pos_frac": 0.796875, "sample": [-2.416748046875, 7.917594909667969, -4.2209014892578125, 8.743331909179688, -0.7807178497314453, -10.837894439697266, 10.853736877441406, -1.4637260437011719, 3.993366241455078, -3.918567657470703, 3.8662338256835938, 2.2352752685546875, 4.655174255371094, 18.29388427734375, 2.8985862731933594, 0.3806648254394531, -0.5948219299316406, 12.691703796386719, 10.697696685791016, 8.308700561523438, -1.7935028076171875, 12.610366821289062, 3.9137191772460938, 25.547271728515625, 9.77880859375, 17.468963623046875, 1.1103343963623047, 0.22974586486816406, 10.310094833374023, 2.8666019439697266, 7.37066650390625, 7.804710388183594, 10.445144653320312, 1.7959957122802734, 11.417106628417969, 12.58553695678711, -0.4160957336425781, 22.104629516601562, 7.863269805908203, 6.277290344238281, 5.861080169677734, 5.255668640136719, 2.914621353149414, 22.378936767578125, 7.588226318359375, 14.120460510253906, 6.31672477722168, 11.488525390625, 1.0755462646484375, 29.196762084960938, 1.5303878784179688, -0.06298065185546875, 4.599292755126953, 14.644790649414062, -7.7731170654296875, 3.18756103515625, -0.37945556640625, 22.505111694335938, 5.443641662597656, -3.6501312255859375, 3.6030120849609375, 2.018829345703125, 6.7314453125, 10.575691223144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 7.227847576141357, "std": 7.131993293762207, "min": -11.87493896484375, "p10": 0.13788108825683612, "median": 7.109651565551758, "p90": 16.1130672454834, "max": 25.923110961914062, "pos_frac": 0.90625, "sample": [9.228130340576172, 3.229127883911133, 0.3130226135253906, -4.27313232421875, 11.996444702148438, -11.87493896484375, 13.039947509765625, 0.3493537902832031, 15.53778076171875, 7.215160369873047, 19.05401611328125, 4.041437149047852, 25.923110961914062, 3.6213531494140625, 7.819328308105469, 7.884429931640625, -1.8154830932617188, 4.093217849731445, 7.004142761230469, 22.080337524414062, 10.722183227539062, 18.850418090820312, 12.494720458984375, 3.8196773529052734, 7.950901031494141, 1.2320919036865234, 15.811893463134766, 5.174272537231445, 4.558448791503906, 2.5492782592773438, 1.0379409790039062, 4.198516845703125, 3.2678298950195312, 0.0628204345703125, 7.959991455078125, 8.431259155273438, -0.6338729858398438, 1.6249847412109375, 3.2722091674804688, 10.134956359863281, 11.9482421875, 2.2353591918945312, 11.424339294433594, 6.536155700683594, 11.03436279296875, -3.0892295837402344, 9.743099212646484, 7.981452941894531, 10.5311279296875, 4.002651214599609, 2.9062347412109375, 20.444427490234375, 5.7082977294921875, 3.557758331298828, 7.5567169189453125, 4.584800720214844, 0.9225177764892578, 14.809303283691406, 14.357864379882812, -7.671989440917969, 16.242141723632812, 22.809921264648438, 8.592428207397461, 8.426971435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 8.08529281616211, "std": 8.748968124389648, "min": -10.084098815917969, "p10": -0.5483957290649406, "median": 5.781959533691406, "p90": 19.088880157470708, "max": 31.9559326171875, "pos_frac": 0.890625, "sample": [17.073928833007812, -2.8790721893310547, 0.5282573699951172, 5.580142974853516, 3.8059463500976562, 4.5197296142578125, 22.661888122558594, 1.8335037231445312, 16.16399383544922, 3.214292526245117, 7.349544525146484, 6.901580810546875, 9.2237548828125, 4.457563400268555, 1.9865341186523438, 12.731971740722656, 11.644832611083984, 10.186389923095703, 5.108736038208008, 3.8054065704345703, 9.638946533203125, 15.8778076171875, 16.081031799316406, -2.185638427734375, 13.126602172851562, 0.9969024658203125, 10.501579284667969, 7.003454208374023, 24.68987274169922, 4.926027297973633, 31.9559326171875, 9.217109680175781, 6.939243316650391, 0.2014331817626953, 17.785186767578125, 0.7181625366210938, 17.67401123046875, 4.464729309082031, 2.4281845092773438, -1.3754425048828125, 3.2131710052490234, 3.5777969360351562, 28.62005615234375, -2.4609756469726562, 11.523483276367188, -0.8697509765625, 0.9725399017333984, 8.214096069335938, -7.009033203125, 28.453643798828125, 2.0247249603271484, 5.983776092529297, 0.93731689453125, 16.786849975585938, 12.432170867919922, -10.084098815917969, 24.8677978515625, 2.406829833984375, 3.1664199829101562, 8.308792114257812, 17.5445556640625, 1.97552490234375, 0.6613578796386719, 19.647605895996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 8.926612854003906, "std": 9.407879829406738, "min": -8.800674438476562, "p10": -1.6715560913085936, "median": 7.715313911437988, "p90": 23.13311004638672, "max": 37.37840270996094, "pos_frac": 0.875, "sample": [9.93243408203125, 10.692970275878906, 18.273284912109375, 23.2701416015625, 18.610443115234375, 4.679058074951172, 37.37840270996094, 24.62139892578125, 8.050416946411133, 19.334732055664062, 0.7856349945068359, 6.926570892333984, -4.914466857910156, 3.208782196044922, 26.399093627929688, -1.6373977661132812, 2.560633659362793, 17.699203491210938, 7.380210876464844, 25.16626739501953, 1.6350841522216797, 2.8473434448242188, 1.4032135009765625, 1.9728164672851562, 5.5245513916015625, 30.911651611328125, 2.9809017181396484, 2.1547489166259766, 9.708431243896484, -8.800674438476562, 10.853950500488281, -2.6693267822265625, 10.325202941894531, 8.837825775146484, 10.654365539550781, 21.696327209472656, 4.406948089599609, 13.90008544921875, 8.780906677246094, 3.5544090270996094, -6.948219299316406, -4.477882385253906, 6.451148986816406, -1.6861953735351562, 5.208845138549805, 11.116275787353516, 8.386676788330078, 3.3411102294921875, 9.294410705566406, 2.564573287963867, 1.5719795227050781, 8.091949462890625, 3.9406986236572266, 11.283580780029297, 7.278860092163086, 16.967926025390625, 10.830215454101562, 4.962158203125, 3.964691162109375, -4.168212890625, 19.88005828857422, 22.876602172851562, 8.232322692871094, 23.2430419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 7.101426124572754, "std": 8.326732635498047, "min": -19.152374267578125, "p10": -2.4692070007324216, "median": 7.377494812011719, "p90": 14.591548156738282, "max": 27.502403259277344, "pos_frac": 0.828125, "sample": [10.01846694946289, -6.295623779296875, 2.4807891845703125, 12.5152587890625, 1.0994682312011719, 3.8679332733154297, 13.401626586914062, 9.325119018554688, 1.0164947509765625, -19.152374267578125, 7.651151657104492, -3.1737899780273438, 2.392641067504883, 4.371580123901367, 13.240005493164062, 6.7361297607421875, 3.8863525390625, 13.371063232421875, 6.567008972167969, 13.647422790527344, 13.968048095703125, 14.731887817382812, 23.088409423828125, 10.340812683105469, 8.761856079101562, 9.475723266601562, -0.6475677490234375, 7.581474304199219, 13.950592041015625, 0.5695419311523438, 12.06229019165039, 22.267486572265625, 5.288532257080078, 7.173515319824219, 8.778675079345703, -2.3116226196289062, 13.7352294921875, 8.301315307617188, 3.630462646484375, -6.690673828125, 14.611270904541016, 3.793781280517578, 6.5110321044921875, 10.979393005371094, 4.930702209472656, -1.6992835998535156, -13.17147445678711, 20.231704711914062, 24.997406005859375, 13.584388732910156, 14.545528411865234, 4.1570281982421875, -2.5367431640625, -1.63995361328125, 12.078128814697266, 6.942249298095703, 27.502403259277344, 11.577007293701172, 2.43475341796875, 11.024702072143555, -2.8093338012695312, 9.629615783691406, 4.681985855102539, 1.112264633178711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 8.905394554138184, "std": 12.702935218811035, "min": -12.299880981445312, "p10": -2.690708923339843, "median": 6.143840789794922, "p90": 22.607251739501955, "max": 70.9461669921875, "pos_frac": 0.78125, "sample": [3.397115707397461, 1.7231979370117188, -1.651519775390625, -0.0064544677734375, 18.6756591796875, -0.38683319091796875, 0.14029693603515625, 9.545665740966797, -2.2533798217773438, 32.73313903808594, 27.101051330566406, 23.951766967773438, 29.812660217285156, 16.385047912597656, 9.733428955078125, 11.505294799804688, -2.0226211547851562, 1.7709922790527344, 12.741767883300781, 14.723747253417969, 3.6952972412109375, 5.541786193847656, 20.54150390625, 10.630115509033203, 7.458442687988281, 7.200538635253906, 6.7458953857421875, 8.253135681152344, 12.30703353881836, 10.102958679199219, -0.9056320190429688, -3.026336669921875, 14.444366455078125, 19.853378295898438, 20.425933837890625, 28.670181274414062, 21.980209350585938, 1.9049301147460938, -4.41046142578125, 16.661758422851562, 2.713665008544922, 0.4964637756347656, 18.380096435546875, 3.863910675048828, 0.6949539184570312, 22.87598419189453, 0.7581329345703125, 4.511528015136719, 70.9461669921875, 16.412689208984375, 14.503456115722656, -2.8781356811523438, -12.299880981445312, 14.207832336425781, 2.9624900817871094, -6.301910400390625, 5.08709716796875, 2.675994873046875, -9.644584655761719, 3.5378265380859375, 8.243667602539062, -7.57855224609375, -2.0040969848632812, 2.085420608520508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 6.7090325355529785, "std": 9.311627388000488, "min": -13.21630859375, "p10": -5.736711692810058, "median": 6.19329833984375, "p90": 18.138483810424805, "max": 36.880348205566406, "pos_frac": 0.84375, "sample": [11.073314666748047, 2.0942306518554688, 0.7375755310058594, 13.216178894042969, 5.9226226806640625, 14.72021484375, 10.176782608032227, 0.778961181640625, 10.477058410644531, -7.5429534912109375, 17.83935546875, 8.060020446777344, 2.049283981323242, -0.1887340545654297, 12.393547058105469, 4.591278076171875, 13.078292846679688, 7.721435546875, 0.3336677551269531, 6.100677490234375, 8.168815612792969, 8.699565887451172, 2.4724273681640625, 6.7189788818359375, -6.329521179199219, 11.183425903320312, 4.431179046630859, 9.817459106445312, -0.4302215576171875, 36.880348205566406, 20.512672424316406, 11.5067138671875, 8.43572998046875, 3.0378952026367188, 0.116302490234375, 7.210666656494141, 1.8719406127929688, 22.825332641601562, -13.21630859375, -6.413795471191406, -8.71575927734375, 0.6356830596923828, 1.3182792663574219, 0.2790966033935547, 2.1700210571289062, 10.940690994262695, 18.266681671142578, 11.56100845336914, -5.084115982055664, 20.68475341796875, 10.058086395263672, 6.285919189453125, -10.086334228515625, 4.542873382568359, 15.187744140625, 4.325782775878906, 4.008449554443359, 4.162940979003906, 32.01844787597656, 26.056045532226562, 7.403316497802734, 1.3445968627929688, -6.016395568847656, 6.8978729248046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 9.09783935546875, "std": 10.289475440979004, "min": -17.582969665527344, "p10": -0.910817146301269, "median": 8.718557357788086, "p90": 21.38752059936524, "max": 41.393402099609375, "pos_frac": 0.875, "sample": [30.358322143554688, 9.152816772460938, 5.606134414672852, 10.825653076171875, 2.118927001953125, 0.6967697143554688, 6.4222564697265625, -9.82830810546875, 7.8752899169921875, 9.055145263671875, -17.582969665527344, 20.69878387451172, 9.440471649169922, 9.114463806152344, 3.5612640380859375, -1.1608104705810547, 23.17840576171875, 4.763465881347656, 18.180374145507812, 25.138534545898438, 14.569992065429688, 18.25836181640625, 6.833274841308594, 0.4459686279296875, -1.770599365234375, 10.121057510375977, -4.219596862792969, 0.1720294952392578, 6.280784606933594, 3.2323131561279297, 17.163742065429688, 10.357887268066406, 9.553672790527344, 11.540050506591797, 14.110328674316406, 14.9381103515625, 18.378883361816406, 17.52808380126953, 6.613250732421875, 1.0113258361816406, -0.3274993896484375, 14.847412109375, 3.1385974884033203, 4.047733306884766, -15.831573486328125, 15.924856185913086, 10.799263000488281, 1.937591552734375, 2.4474334716796875, 7.81292724609375, 3.7337493896484375, 18.414093017578125, 22.704727172851562, 8.381969451904297, 21.682693481445312, 0.9927139282226562, 10.399457931518555, 41.393402099609375, -3.7342758178710938, 17.732864379882812, 3.575897216796875, 30.376129150390625, 3.7875595092773438, 15.290107727050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 11.019262313842773, "std": 13.221428871154785, "min": -26.046981811523438, "p10": -1.947132492065429, "median": 9.001904487609863, "p90": 27.49088592529297, "max": 61.80133056640625, "pos_frac": 0.796875, "sample": [1.4846649169921875, 23.54290771484375, 32.47630310058594, 9.45944595336914, 6.2135467529296875, 15.358688354492188, 3.6872806549072266, 19.657001495361328, 0.26769065856933594, 9.00166130065918, 3.6838226318359375, 12.821983337402344, -0.5483322143554688, 5.8363037109375, 15.579010009765625, 41.62928771972656, 6.416912078857422, -2.7082157135009766, 22.846817016601562, 8.985780715942383, 14.945987701416016, -3.8563690185546875, 34.57000732421875, 9.002147674560547, -0.6424236297607422, 5.17254638671875, -2.689542770385742, 8.515493392944336, -26.046981811523438, -0.8467369079589844, 9.539283752441406, -2.3188552856445312, 61.80133056640625, 6.621747970581055, 1.6129913330078125, 13.77762222290039, 21.83844757080078, 26.536300659179688, 13.825225830078125, 5.655681610107422, 7.006479263305664, 27.899993896484375, 16.09496307373047, 13.696067810058594, -1.190216064453125, -10.970535278320312, 31.201736450195312, 3.0277175903320312, 28.77764892578125, -1.2225685119628906, 19.24987030029297, 15.378097534179688, 16.569236755371094, -2.257659912109375, 17.339508056640625, -0.40776634216308594, 5.527006149291992, 10.003986358642578, 17.644195556640625, 13.806259155273438, 0.19886398315429688, 14.121131896972656, 22.48804473876953, 8.544292449951172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 11.322837829589844, "std": 13.47046184539795, "min": -30.49311065673828, "p10": -1.7420259475708002, "median": 8.902275085449219, "p90": 30.140518188476573, "max": 41.86500549316406, "pos_frac": 0.8125, "sample": [41.86500549316406, 31.180923461914062, 27.712905883789062, 12.833610534667969, 8.105894088745117, 12.532402038574219, -1.2028427124023438, 17.377044677734375, 3.5392398834228516, 26.226463317871094, 21.592758178710938, 2.5027999877929688, -0.31439208984375, 32.911865234375, 37.24702453613281, 0.8313484191894531, 12.886821746826172, 3.0847606658935547, -4.974555969238281, 6.614341735839844, 0.9690570831298828, 26.008285522460938, 8.628646850585938, -30.49311065673828, 23.074813842773438, 11.793182373046875, -5.227386474609375, 9.264568328857422, 18.304672241210938, 4.956012725830078, 6.403228759765625, 4.356128692626953, -3.2109413146972656, 5.944902420043945, 37.20530700683594, 9.419933319091797, 26.619644165039062, 20.73694610595703, 31.510475158691406, 10.471118927001953, -1.0186004638671875, -20.33001708984375, 13.43597412109375, 1.8596916198730469, 9.1759033203125, 24.073806762695312, 31.572242736816406, -3.426055908203125, 6.30780029296875, 25.651153564453125, 14.480712890625, 6.87677001953125, 27.584976196289062, -1.973104476928711, 15.854690551757812, 13.8817138671875, 6.436248779296875, 2.99969482421875, 21.02215576171875, 7.1968536376953125, -1.1671714782714844, 7.421405792236328, -0.6176414489746094, 8.073562622070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 10.432147979736328, "std": 13.566396713256836, "min": -14.986053466796875, "p10": -2.2648239135742183, "median": 8.812446594238281, "p90": 25.114152526855474, "max": 63.34465026855469, "pos_frac": 0.8125, "sample": [10.01031494140625, 63.34465026855469, -1.7496185302734375, 21.11113739013672, 16.18988037109375, 16.115158081054688, 2.7966651916503906, -1.6339836120605469, 11.634794235229492, 8.824058532714844, -13.31182861328125, 2.574258804321289, 8.800834655761719, -1.168304443359375, 13.791149139404297, 5.1719818115234375, 6.46082878112793, 0.3406333923339844, 1.8626518249511719, 25.74853515625, 13.019332885742188, 11.789731979370117, -0.035060882568359375, 13.185592651367188, 21.514957427978516, 1.9792327880859375, 9.378501892089844, -5.416877746582031, -5.605680465698242, -14.986053466796875, 15.711441040039062, 19.82427215576172, 22.147380828857422, 9.71142578125, 3.275890350341797, 16.051727294921875, 5.609354019165039, -0.8870925903320312, 3.795146942138672, 17.67475128173828, 13.341758728027344, 2.4220657348632812, -2.485626220703125, 7.253013610839844, 8.159051895141602, -11.765312194824219, 11.020843505859375, 2.5235958099365234, -7.379905700683594, 19.123382568359375, 5.924915313720703, 28.455337524414062, 35.28153991699219, 27.696029663085938, 7.2751922607421875, 23.633926391601562, 2.6770858764648438, 11.033489227294922, 20.264610290527344, 2.4823970794677734, 45.122314453125, 1.9303741455078125, 41.42205810546875, 17.593612670898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 11.204421997070312, "std": 14.739347457885742, "min": -20.887908935546875, "p10": -3.1235679626464843, "median": 8.774016380310059, "p90": 28.822702789306643, "max": 71.02877807617188, "pos_frac": 0.796875, "sample": [31.201126098632812, 6.9261016845703125, 28.453033447265625, -1.8798294067382812, 3.4062137603759766, -2.8316574096679688, 23.60375213623047, 7.898927688598633, -8.486930847167969, 3.0754241943359375, 23.757896423339844, 12.54465103149414, 4.97149658203125, 23.17304229736328, -1.2845306396484375, -5.339441299438477, 10.054412841796875, 13.7606201171875, 2.698394775390625, -20.887908935546875, 28.98113250732422, 11.674179077148438, -2.1908607482910156, -0.134521484375, 10.408645629882812, 0.29083251953125, 30.91399383544922, 8.573354721069336, -1.9172515869140625, 4.081291198730469, -3.2486724853515625, 23.178577423095703, 10.575347900390625, 3.9636707305908203, 4.762298583984375, 11.6129150390625, 18.411718368530273, 9.303224563598633, 1.6717300415039062, -6.7610321044921875, 9.65985107421875, 23.468963623046875, 5.9792022705078125, 4.671733856201172, 38.20208740234375, 19.968482971191406, 7.607105255126953, 23.266189575195312, 18.63433074951172, 17.78191375732422, 11.554031372070312, 6.318460464477539, 13.759130477905273, 47.55958557128906, 7.563575744628906, 1.361806869506836, 71.02877807617188, 3.305166244506836, -6.234306335449219, 35.630615234375, 27.13066864013672, 8.974678039550781, -9.586395263671875, 10.512022018432617], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 10.456976890563965, "std": 12.942193031311035, "min": -16.615188598632812, "p10": -3.4199691772460925, "median": 8.686996459960938, "p90": 27.84365234375001, "max": 56.88294982910156, "pos_frac": 0.84375, "sample": [-10.287513732910156, -3.9153289794921875, 38.506439208984375, 13.508773803710938, 6.762815475463867, -13.671127319335938, 13.78338623046875, 31.13294219970703, 4.350002288818359, 0.9716148376464844, 19.841796875, 19.807601928710938, 26.16454315185547, 16.59375, 9.073844909667969, -8.759414672851562, 9.478889465332031, 19.083293914794922, 8.951896667480469, 14.146148681640625, -4.860420227050781, 5.949422836303711, 7.169816970825195, -4.412139892578125, 11.500139236450195, 10.387260437011719, 24.957916259765625, 8.117630004882812, 13.052558898925781, 25.81310272216797, 28.563270568847656, 32.984432220458984, 0.761871337890625, 10.114555358886719, 12.675491333007812, 2.69158935546875, -0.5654983520507812, 8.381612777709961, 0.8952178955078125, 7.399971008300781, 25.339397430419922, 1.937774658203125, 11.441556930541992, -16.615188598632812, 8.422096252441406, 5.729530334472656, 5.249326705932617, 30.709091186523438, 0.20977783203125, 1.8613338470458984, 1.674795150756836, 56.88294982910156, 33.387840270996094, 16.655136108398438, 20.882522583007812, -2.264129638671875, 3.2216873168945312, 5.663490295410156, 11.819419860839844, -1.6695556640625, 8.984642028808594, 4.376665115356445, 5.195396423339844, 13.04879379272461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 15.359737396240234, "std": 17.678476333618164, "min": -17.740188598632812, "p10": -1.0613063812255856, "median": 10.8530855178833, "p90": 37.15473175048829, "max": 80.160888671875, "pos_frac": 0.84375, "sample": [7.484771728515625, 11.587310791015625, 54.4471435546875, 18.1353759765625, 20.557968139648438, 28.861373901367188, -17.740188598632812, -7.278923034667969, -0.6561965942382812, 14.994216918945312, 4.3596343994140625, 12.714580535888672, 1.99932861328125, 15.578075408935547, 4.167438507080078, 23.94558334350586, 52.789703369140625, 10.0059814453125, 28.145530700683594, 27.244735717773438, -8.489669799804688, 48.36750793457031, 17.910655975341797, 9.136177062988281, 30.106765747070312, 23.4791259765625, 3.5861587524414062, 26.340553283691406, 19.095447540283203, 11.24174690246582, -0.24277496337890625, -2.659557342529297, 29.928001403808594, 80.160888671875, 7.292701721191406, 39.06591796875, 11.987930297851562, 5.2846832275390625, 33.60111999511719, 1.3521385192871094, -1.2162857055664062, 23.409927368164062, 12.309947967529297, 5.289207458496094, -0.6996879577636719, 22.806041717529297, 7.692272186279297, 52.56488037109375, 6.378570556640625, 14.463216781616211, 7.37010383605957, -8.022140502929688, 10.464424133300781, 6.338592529296875, 3.5652236938476562, 2.6425628662109375, 35.417816162109375, 34.83233642578125, 1.667449951171875, 37.89912414550781, 3.4805469512939453, 1.6265544891357422, 9.653348922729492, -4.799751281738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 16.183441162109375, "std": 20.671733856201172, "min": -35.165863037109375, "p10": -6.226128959655761, "median": 14.524568557739258, "p90": 38.0580551147461, "max": 78.91259765625, "pos_frac": 0.84375, "sample": [7.962533950805664, 0.1018218994140625, 19.862247467041016, 25.75048065185547, 30.46368408203125, 17.71189308166504, -18.706008911132812, -12.69677734375, 20.19670867919922, 4.830615997314453, -7.153472900390625, 28.489273071289062, -6.395542144775391, 19.455841064453125, 67.25114440917969, 0.7005615234375, 4.370147705078125, 42.398590087890625, 16.94707679748535, 78.91259765625, 20.634719848632812, 13.744270324707031, 4.93182373046875, 48.01661682128906, 1.0986709594726562, 26.730438232421875, 35.26202392578125, 18.067293167114258, -6.554117202758789, 8.936134338378906, 9.256996154785156, 35.14220428466797, 5.168975830078125, 0.17454147338867188, 0.08298492431640625, -4.186237335205078, 25.420490264892578, 7.7158050537109375, 24.014053344726562, -5.830831527709961, 6.943504333496094, 6.657600402832031, -9.694129943847656, 1.9438323974609375, 15.304866790771484, 3.500396728515625, 31.679100036621094, 37.82300567626953, -35.165863037109375, 3.146625518798828, 33.90864562988281, 12.261016845703125, 1.4618148803710938, 59.22938537597656, 21.53591537475586, -3.221477508544922, 62.6944580078125, 15.942943572998047, 33.651947021484375, 25.06147575378418, 5.22724723815918, 34.11192321777344, 25.296836853027344, 38.158790588378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 19.849706649780273, "std": 23.86857795715332, "min": -19.205581665039062, "p10": -1.8054302215576172, "median": 13.897655487060547, "p90": 38.65131454467774, "max": 100.6937255859375, "pos_frac": 0.84375, "sample": [-11.170066833496094, 81.76885986328125, 3.078937530517578, 6.88665771484375, 8.15250015258789, 19.510967254638672, 6.432609558105469, 19.094558715820312, 21.466407775878906, 11.057975769042969, 35.42536926269531, 15.851690292358398, 35.11054992675781, 5.865119934082031, 8.072982788085938, 3.486297607421875, 11.628276824951172, 30.91064453125, -2.3019180297851562, 3.3139286041259766, 6.831245422363281, 83.0911865234375, 14.337322235107422, 13.457988739013672, 26.054039001464844, 24.291519165039062, 8.68344497680664, 20.714027404785156, 17.113988876342773, -19.205581665039062, -2.6575927734375, -7.9446258544921875, 31.133102416992188, -0.5105133056640625, 7.660520553588867, 27.878021240234375, 34.08667755126953, -1.826385498046875, 36.50727844238281, 36.86840057373047, -1.0995407104492188, 27.649246215820312, 3.3483810424804688, 2.649322509765625, 35.93646240234375, 32.77501678466797, 0.05770111083984375, 65.70722961425781, 89.45932006835938, 100.6937255859375, 5.5015716552734375, 39.41542053222656, -12.015497207641602, -1.7565345764160156, 29.525348663330078, 14.743385314941406, 12.142436981201172, 15.527366638183594, 26.21947479248047, 27.706260681152344, 58.052520751953125, 7.904869079589844, 8.524694442749023, 11.50665283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 19.57806396484375, "std": 27.167381286621094, "min": -61.59521484375, "p10": -6.309566497802729, "median": 17.410282135009766, "p90": 53.924940490722676, "max": 110.27850341796875, "pos_frac": 0.875, "sample": [26.178192138671875, 24.09147834777832, 5.481773376464844, 17.75945281982422, 0.5606498718261719, 9.442934036254883, 17.061111450195312, 44.287811279296875, -37.099212646484375, 43.323455810546875, 25.850284576416016, 24.533857345581055, 7.570158004760742, 2.463104248046875, 24.00020980834961, 14.102676391601562, 27.439193725585938, 46.73847961425781, 28.45148468017578, 55.853118896484375, 63.75482177734375, -13.93873405456543, 12.507469177246094, 14.971506118774414, 32.78441619873047, -1.350067138671875, 27.232498168945312, 20.997787475585938, 28.09520721435547, 30.22052764892578, 2.2643661499023438, 4.4929656982421875, 27.466888427734375, -41.699737548828125, 68.80145263671875, 49.42585754394531, 28.989990234375, 9.943851470947266, 28.76873779296875, -10.283905029296875, 14.90966796875, 78.5057373046875, 110.27850341796875, -11.585628509521484, 14.089723587036133, 15.233978271484375, 5.83319091796875, 13.869720458984375, 2.649129867553711, 69.52659606933594, -61.59521484375, 4.592639923095703, 1.7344512939453125, 18.305282592773438, 22.624984741210938, 30.433013916015625, 13.490440368652344, 22.64220428466797, 65.84808349609375, 18.911550521850586, 7.844936370849609, 11.206535339355469, -8.435066223144531, 0.5455513000488281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 20.007017135620117, "std": 35.32236862182617, "min": -56.418487548828125, "p10": -22.614213562011717, "median": 16.3887882232666, "p90": 60.97420806884766, "max": 116.87422180175781, "pos_frac": 0.765625, "sample": [49.15869903564453, -35.000732421875, 17.324302673339844, 26.78704071044922, 13.039676666259766, 61.95000457763672, -20.706581115722656, 30.062850952148438, 18.620681762695312, 1.6817550659179688, 59.615234375, 45.94034194946289, 41.943199157714844, -3.1332778930664062, -8.4190673828125, -32.600921630859375, 101.34893798828125, -3.3932723999023438, 14.571126937866211, 35.79405975341797, 25.70641326904297, 13.901981353759766, 21.90209197998047, 2.1344852447509766, 4.360816955566406, 16.725391387939453, -38.0291862487793, 13.55533218383789, 54.585357666015625, 19.350929260253906, 21.80292510986328, 7.482507705688477, 112.3902587890625, -29.653152465820312, 2.1204566955566406, 48.658653259277344, 55.07219696044922, -15.685186386108398, 8.696537017822266, -1.6994686126708984, -12.301597595214844, -12.670783996582031, -23.43177032470703, 6.512073516845703, 19.937782287597656, 20.365638732910156, 5.898612976074219, 8.128337860107422, 29.93482208251953, 74.30064392089844, 32.867340087890625, 9.507892608642578, 46.622344970703125, 4.243251800537109, 28.730133056640625, -33.42718505859375, 16.05218505859375, 103.57505798339844, 61.55662536621094, 116.87422180175781, 3.614103317260742, 53.628990173339844, 18.385528564453125, -56.418487548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 23.121337890625, "std": 40.132774353027344, "min": -66.4885025024414, "p10": -8.23329849243164, "median": 15.81401252746582, "p90": 54.49941635131837, "max": 244.16937255859375, "pos_frac": 0.78125, "sample": [33.773231506347656, 9.494606018066406, 25.967254638671875, 0.14594078063964844, 26.698013305664062, 15.843955993652344, 17.23831558227539, 94.6721420288086, 1.8664512634277344, 47.125694274902344, 37.358497619628906, 11.21005630493164, 30.455196380615234, -2.518035888671875, 21.515045166015625, 244.16937255859375, 13.86309814453125, 53.32283020019531, 25.426509857177734, 8.590324401855469, 2.9478225708007812, 92.94126892089844, 10.362106323242188, 53.393516540527344, 34.84693908691406, 14.466629028320312, -5.439338684082031, -23.586685180664062, -2.0895957946777344, -37.64466094970703, 23.414905548095703, -7.289741516113281, 3.7299232482910156, 12.247222900390625, -66.4885025024414, 30.565338134765625, -10.100868225097656, 43.77231216430664, 5.094646453857422, -16.696041107177734, -20.596885681152344, 14.894577026367188, 36.30523681640625, 25.47918701171875, 6.181785583496094, 100.42315673828125, 18.539169311523438, -8.637680053710938, 18.322586059570312, 14.959579467773438, 15.784069061279297, 62.79302978515625, 34.48051452636719, 44.7486572265625, 64.50572967529297, 15.310935974121094, 5.655111312866211, 43.07452392578125, 54.97337341308594, 44.26447296142578, -6.360115051269531, -4.7666778564453125, -2.425872802734375, 27.191394805908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 32.434326171875, "std": 37.91066360473633, "min": -32.0225830078125, "p10": -4.510084152221678, "median": 23.57758331298828, "p90": 84.05277252197267, "max": 140.498046875, "pos_frac": 0.828125, "sample": [-1.3799629211425781, 53.60212707519531, 22.273788452148438, 8.919479370117188, 26.29601287841797, 13.511917114257812, 14.396156311035156, 1.5973243713378906, 81.83851623535156, 140.498046875, 46.55464172363281, 22.58745002746582, -26.087860107421875, 41.46282958984375, 122.72193908691406, 25.349056243896484, 77.81597900390625, 101.16935729980469, 13.794466018676758, -2.7237281799316406, 70.7154541015625, 44.18986511230469, 23.13799285888672, 9.050376892089844, -0.1611328125, 74.3949203491211, 85.00173950195312, 3.1395339965820312, 13.882314682006836, 45.58524703979492, 29.445106506347656, 28.334125518798828, -5.275665283203125, 109.52964782714844, -13.771675109863281, -1.3614883422851562, 42.993011474609375, -32.0225830078125, 32.958961486816406, 30.545764923095703, 12.175418853759766, 17.9630126953125, -20.74022674560547, 18.370773315429688, -5.422321319580078, 47.505096435546875, 4.445545196533203, 24.017173767089844, 78.3612060546875, 53.223724365234375, 5.029573440551758, -13.729034423828125, 12.12394905090332, 15.549530029296875, 24.978042602539062, 7.009304046630859, 2.5759429931640625, 49.681854248046875, 17.332443237304688, 26.85277557373047, 67.62287139892578, 120.98617553710938, 109.14743041992188, 26.227432250976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 19.47739028930664, "std": 28.923913955688477, "min": -89.11721801757812, "p10": -9.22292785644531, "median": 16.16430950164795, "p90": 50.503019714355474, "max": 103.51191711425781, "pos_frac": 0.828125, "sample": [18.52012825012207, 1.9495658874511719, 4.684732437133789, 42.59977722167969, 8.733840942382812, 4.199676513671875, -5.535591125488281, 40.96856689453125, 48.15712356567383, 29.341812133789062, 30.561729431152344, 39.00257110595703, 13.232093811035156, 45.926910400390625, 1.9305648803710938, 6.9450225830078125, 12.992792129516602, 103.51191711425781, 61.251075744628906, 13.256797790527344, -20.059547424316406, 16.88966178894043, -1.8119182586669922, 22.52570343017578, -27.580093383789062, 5.702802658081055, 8.671937942504883, 32.726341247558594, 7.6148529052734375, 2.724262237548828, 45.46415710449219, 19.393768310546875, 32.639434814453125, 13.463104248046875, 50.93605041503906, 5.8201904296875, -10.059818267822266, 49.49261474609375, 1.7621650695800781, -26.763267517089844, 8.588281631469727, 8.782291412353516, -14.750482559204102, -29.468658447265625, 28.046676635742188, 58.71685791015625, 79.906494140625, 42.587738037109375, 11.838191986083984, 15.079597473144531, 19.433395385742188, -7.270183563232422, 15.438957214355469, 36.511749267578125, 25.379295349121094, 18.37751007080078, -0.8560256958007812, 32.030784606933594, 56.25391387939453, 42.61829376220703, 45.53001403808594, 68.65199279785156, 22.459999084472656, -89.11721801757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 33.11540603637695, "std": 40.546939849853516, "min": -96.91961669921875, "p10": -5.0467700958251935, "median": 32.56149864196777, "p90": 91.18480987548828, "max": 146.3433837890625, "pos_frac": 0.875, "sample": [29.124305725097656, 63.466209411621094, 44.747745513916016, 63.41330337524414, 68.89321899414062, -7.781593322753906, 89.99545288085938, 32.429847717285156, 12.508159637451172, 11.132381439208984, 14.872625350952148, 57.81919860839844, 57.79460906982422, 6.709835052490234, -3.39825439453125, 51.73737335205078, -96.91961669921875, 43.99853515625, 67.41849517822266, 48.73387145996094, 16.888362884521484, -5.753276824951172, 15.114124298095703, 4.023365020751953, 14.00213623046875, 95.73077392578125, 7.143754959106445, 47.521568298339844, 19.444068908691406, -7.342674255371094, 49.99491500854492, 43.2728271484375, 17.218679428100586, 106.70137023925781, 57.801239013671875, 14.91954231262207, 114.21221923828125, 146.3433837890625, 36.01239013671875, 31.010498046875, -51.850067138671875, 14.096817016601562, -57.22833251953125, 2.964954376220703, 19.054479598999023, 25.428184509277344, 44.402320861816406, 33.77001190185547, 44.192115783691406, 39.380126953125, 14.832382202148438, -28.137100219726562, 5.148036956787109, 47.812049865722656, 40.69268035888672, 33.24794006347656, 8.60544204711914, 91.69453430175781, 111.03529357910156, 19.996231079101562, 41.7336311340332, 97.01860046386719, 32.69314956665039, 7.8475341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 23.709949493408203, "std": 36.07380294799805, "min": -47.58760070800781, "p10": -16.62705612182617, "median": 21.33956813812256, "p90": 67.56072082519532, "max": 169.156005859375, "pos_frac": 0.78125, "sample": [-18.58050537109375, 3.7197837829589844, 74.56536865234375, 23.714256286621094, -10.838455200195312, 73.64595031738281, 20.49610137939453, 169.156005859375, 39.747406005859375, 29.86996841430664, 29.561016082763672, 13.097782135009766, -6.908229827880859, 14.531778335571289, 28.417285919189453, 27.98923110961914, -1.7168731689453125, 15.638315200805664, -7.942665100097656, 13.805870056152344, 5.751115798950195, 23.028093338012695, 41.88608932495117, 45.553428649902344, 29.086414337158203, 12.870975494384766, 25.920021057128906, 32.80889892578125, 42.499053955078125, 6.714263916015625, 68.00741577148438, 23.497934341430664, 9.279678344726562, -28.145427703857422, 38.008522033691406, -37.54216003417969, 108.2132568359375, 34.852195739746094, 66.5184326171875, -19.4666748046875, 49.4525032043457, 1.7834548950195312, 76.90133666992188, 42.440486907958984, 49.454933166503906, -9.083885192871094, 15.739799499511719, 5.622219085693359, 16.029239654541016, 7.682163238525391, 10.296388626098633, -3.4563121795654297, 22.183034896850586, -12.069007873535156, 63.93202209472656, -47.58760070800781, 66.44961547851562, 9.147624969482422, -42.814239501953125, 36.351707458496094, -32.727142333984375, 71.39027404785156, 10.287796020507812, 48.719512939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 25.500267028808594, "std": 38.951141357421875, "min": -64.95878601074219, "p10": -19.779771423339838, "median": 20.09039306640625, "p90": 74.65989456176759, "max": 129.12002563476562, "pos_frac": 0.75, "sample": [-29.88147735595703, 73.00431060791016, 4.91490364074707, 109.17066955566406, -3.83087158203125, 3.024829864501953, -6.573236465454102, -22.152740478515625, 60.88146209716797, 43.378662109375, 23.048828125, -64.95878601074219, 34.75627899169922, 112.22689819335938, 7.716056823730469, 45.32110595703125, 109.07748413085938, 67.49555969238281, 23.512588500976562, -26.53460693359375, 129.12002563476562, 28.022377014160156, -4.7165069580078125, -24.2421875, 46.1527099609375, -0.6255912780761719, -14.242843627929688, 25.574905395507812, 20.503082275390625, 95.61632537841797, 1.557342529296875, 10.533075332641602, 19.677703857421875, 36.571083068847656, -31.878814697265625, 15.847328186035156, 5.533531188964844, 10.235435485839844, 72.08102416992188, 75.36943054199219, 36.599430084228516, 47.75921630859375, 57.71942138671875, 25.761520385742188, 42.45695495605469, -8.164344787597656, 2.6768417358398438, 50.641929626464844, 60.74524688720703, 14.286300659179688, 24.118515014648438, -7.1080169677734375, -2.3238296508789062, 60.894287109375, 0.9892234802246094, 40.437774658203125, 5.423206329345703, 8.98861312866211, 6.20465087890625, -27.233535766601562, 3.5375537872314453, 87.87251281738281, -6.825199127197266, 26.271469116210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 26.629343032836914, "std": 45.99043655395508, "min": -61.22895050048828, "p10": -19.700738525390623, "median": 16.545135498046875, "p90": 78.39695434570312, "max": 216.40036010742188, "pos_frac": 0.765625, "sample": [16.46392822265625, 11.071098327636719, -18.939300537109375, 33.435401916503906, 8.941253662109375, 4.706722259521484, -11.95339584350586, -18.7203369140625, 77.69911193847656, -23.790687561035156, 5.595726013183594, 22.109882354736328, -24.76512908935547, -38.438636779785156, -26.889686584472656, 37.701332092285156, 216.40036010742188, 61.899696350097656, 130.29925537109375, 62.93645477294922, 59.46910095214844, 8.16048812866211, 62.68769836425781, 98.55521392822266, 50.09394073486328, 25.027549743652344, -2.3333377838134766, 69.64718627929688, 20.49523162841797, 0.758453369140625, 16.6263427734375, -33.62449645996094, 4.899456024169922, -20.027069091796875, 42.27799987792969, -61.22895050048828, 84.85368347167969, 52.849937438964844, -18.392059326171875, 153.38735961914062, 12.771137237548828, 12.328460693359375, 1.7888679504394531, 9.261154174804688, 81.26988983154297, 28.051780700683594, 39.89647674560547, 68.16944885253906, 19.458024978637695, 10.741996765136719, -8.008352279663086, -6.856597900390625, 2.3962841033935547, 24.35003662109375, 29.43169593811035, 37.38877868652344, 10.789703369140625, 1.6646881103515625, 48.22801208496094, 43.42237854003906, 9.023948669433594, 78.69602966308594, -6.860288619995117, 16.92761993408203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 35.88337326049805, "std": 52.53159713745117, "min": -74.34921264648438, "p10": -21.062691879272457, "median": 28.274948120117188, "p90": 96.78348083496094, "max": 221.1695556640625, "pos_frac": 0.78125, "sample": [75.54317474365234, 59.5111083984375, -74.34921264648438, 56.642250061035156, 38.18695831298828, 124.52293395996094, 32.133602142333984, -22.835872650146484, 10.903526306152344, 49.36402893066406, 53.56758117675781, -64.55384826660156, 5.880836486816406, 50.54833984375, 167.98745727539062, 24.399723052978516, 45.42109680175781, 65.00703430175781, 110.64205932617188, -30.262351989746094, 2.3244457244873047, -0.8941497802734375, 25.454330444335938, 103.10432434082031, 36.92304992675781, 15.260965347290039, 40.1114501953125, 19.98587417602539, 12.147771835327148, -25.387481689453125, 40.433197021484375, -3.197784423828125, 9.149065017700195, 20.213226318359375, 26.58348846435547, 221.1695556640625, 68.93583679199219, 24.807647705078125, -0.05039215087890625, 97.207763671875, 72.62164306640625, 95.79348754882812, 29.966407775878906, -8.585826873779297, 0.9231033325195312, -9.156044006347656, -40.03105545043945, 21.382469177246094, -11.296348571777344, 58.76957702636719, 21.079715728759766, 11.959741592407227, 32.389793395996094, -38.99761962890625, 47.715843200683594, 63.44255828857422, 176.19488525390625, 92.4881591796875, 18.953994750976562, 49.952491760253906, 2.7073745727539062, 48.675933837890625, -16.925270080566406, 63.96821594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 28.397132873535156, "std": 57.212318420410156, "min": -120.62965393066406, "p10": -38.149132537841794, "median": 23.988128662109375, "p90": 86.60402297973634, "max": 240.01004028320312, "pos_frac": 0.765625, "sample": [-37.614498138427734, 56.338768005371094, -60.034881591796875, 25.06237030029297, 31.10726547241211, 23.892480850219727, -54.273780822753906, 19.955646514892578, 40.23858642578125, 87.55204010009766, 121.380859375, 14.641868591308594, -38.37826156616211, 22.24384307861328, 24.083776473999023, 180.8805389404297, 21.97601318359375, -82.77203369140625, -21.667877197265625, 55.676025390625, 1.8996505737304688, 45.30329895019531, 91.45807647705078, 5.982122421264648, -30.134845733642578, -18.328880310058594, 9.377250671386719, 51.56108856201172, 34.45416259765625, 16.941070556640625, 40.285797119140625, 57.885589599609375, 71.12084197998047, -0.6962966918945312, 131.02130126953125, 26.17313003540039, 84.39198303222656, 24.756275177001953, 10.931974411010742, 19.58592987060547, -9.498863220214844, 55.23896789550781, 116.12568664550781, -44.4871826171875, 43.876441955566406, 42.822410583496094, 8.943105697631836, 74.56672668457031, 75.9477310180664, -77.46038818359375, 21.256332397460938, 10.8880615234375, -120.62965393066406, 21.108291625976562, 54.89031219482422, 31.7655029296875, -4.792427062988281, 37.66265106201172, 21.29568862915039, 240.01004028320312, 28.642684936523438, 17.44350242614746, -2.8461151123046875, 70.3886947631836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 36.04762649536133, "std": 50.28451919555664, "min": -64.24420166015625, "p10": -21.680008697509766, "median": 27.656166076660156, "p90": 114.74683990478516, "max": 159.04556274414062, "pos_frac": 0.765625, "sample": [35.76963806152344, 42.79825210571289, 25.260120391845703, 115.44056701660156, 75.20895385742188, 16.82223129272461, -19.3377685546875, 159.04556274414062, 134.25161743164062, -2.7420654296875, 31.142181396484375, 24.44401741027832, -4.840118408203125, -15.942543029785156, 25.338966369628906, 113.12814331054688, -21.541351318359375, 143.33583068847656, 19.799484252929688, 127.2924575805664, 71.14161682128906, 25.779563903808594, 68.56024169921875, -36.09069061279297, -42.49089050292969, -21.73943328857422, 99.73287963867188, -49.17650604248047, 21.29999351501465, 107.33668518066406, -7.824180603027344, 31.201400756835938, 7.034799575805664, -16.44781494140625, 52.723045349121094, 13.850074768066406, 34.60532760620117, 1.6376190185546875, 69.3434829711914, 35.45430374145508, 37.25721740722656, 44.74686050415039, 79.41722106933594, 130.78536987304688, 25.0484619140625, 81.47714233398438, 6.103969573974609, -32.055206298828125, -5.770946502685547, 8.483604431152344, -64.24420166015625, 118.408447265625, 13.184410095214844, 65.7393798828125, 42.782127380371094, 55.826568603515625, -29.920738220214844, 12.4080810546875, 25.71572494506836, 58.81159973144531, 67.35147094726562, 29.53276824951172, 45.16046905517578, 0.192413330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 26.913192749023438, "std": 37.135311126708984, "min": -91.59463500976562, "p10": -9.187589836120601, "median": 24.816649436950684, "p90": 70.07678298950196, "max": 140.80421447753906, "pos_frac": 0.765625, "sample": [-1.6878280639648438, 39.234344482421875, 5.919464111328125, 10.186174392700195, 99.42880249023438, 20.458145141601562, 10.809837341308594, 33.955322265625, 27.025178909301758, 0.5629501342773438, 43.11386489868164, 72.2467269897461, -43.62071228027344, 14.507087707519531, 22.60811996459961, 37.822418212890625, 14.77979850769043, 140.80421447753906, 44.91618728637695, 0.3985137939453125, 5.577741622924805, 35.99412536621094, -10.799263000488281, 8.782564163208008, -0.6997795104980469, 63.28887939453125, 44.00419616699219, 29.125808715820312, -91.59463500976562, -5.427019119262695, 71.5311508178711, -0.96478271484375, 51.72509765625, 13.998832702636719, 37.54475402832031, 39.06309509277344, 80.27835083007812, 66.68325805664062, 64.31907653808594, 14.271003723144531, -4.463859558105469, 40.36266326904297, -10.982366561889648, 21.881528854370117, 38.765777587890625, 41.1052131652832, -3.710540771484375, -11.230962753295898, 53.76573181152344, 52.65718078613281, -19.728721618652344, 56.86833953857422, 96.32878112792969, 22.323410034179688, 104.756591796875, 30.38970947265625, 46.004493713378906, 19.881912231445312, -50.39551544189453, 42.981468200683594, 16.14593505859375, 29.055984497070312, -0.4413719177246094, -0.048122406005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 28.315706253051758, "std": 43.99538040161133, "min": -66.83724212646484, "p10": -25.704162597656246, "median": 26.13123321533203, "p90": 88.9405288696289, "max": 123.5101318359375, "pos_frac": 0.703125, "sample": [63.61524963378906, 31.61212921142578, 37.308815002441406, 7.352924346923828, -31.673019409179688, 29.756317138671875, 67.0994873046875, -5.148555755615234, 91.08252716064453, 16.536766052246094, -3.1105690002441406, 9.866836547851562, 115.92620849609375, 48.56769561767578, 36.65966033935547, 32.667449951171875, -12.831008911132812, -43.53211212158203, 27.614578247070312, 2.6688919067382812, -66.83724212646484, 120.86618041992188, 41.932090759277344, -8.251485824584961, -27.44952392578125, 64.57694244384766, 0.8602523803710938, -38.647125244140625, 31.113685607910156, 13.389480590820312, 80.132080078125, -10.194595336914062, -47.28129577636719, -1.7713165283203125, 13.917266845703125, 5.396507263183594, 94.86474609375, 23.9482421875, 63.72792053222656, -1.802713394165039, -11.423967361450195, 89.47425842285156, -3.630939483642578, 83.0909423828125, 117.30596923828125, 22.904621124267578, 87.69515991210938, 123.5101318359375, 20.453079223632812, 8.923347473144531, 69.18579864501953, 76.6989517211914, -1.9512481689453125, -21.63165283203125, 33.841156005859375, 28.75547981262207, 59.22486877441406, 32.646995544433594, 41.99341583251953, 54.84511947631836, -51.07806396484375, 24.64788818359375, -2.690662384033203, 54.88428497314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 30.29677963256836, "std": 73.71614837646484, "min": -135.28787231445312, "p10": -41.613576507568354, "median": 16.956130027770996, "p90": 106.18206939697266, "max": 283.8001708984375, "pos_frac": 0.6875, "sample": [-36.191925048828125, 32.07984161376953, 107.05650329589844, -50.893150329589844, 47.352210998535156, 54.11131286621094, 38.44645690917969, 11.467605590820312, 211.28753662109375, 7.7833709716796875, 10.96426773071289, -28.42778778076172, -6.37261962890625, 39.45829772949219, -17.280517578125, 30.693023681640625, 77.71670532226562, 19.40845489501953, -4.064319610595703, -135.28787231445312, 143.17190551757812, 50.2519416809082, 47.1085319519043, 16.549795150756836, 16.500411987304688, -27.82573699951172, -20.256134033203125, 104.1417236328125, -5.873493194580078, 17.362464904785156, 59.27932357788086, 21.905494689941406, 14.711442947387695, -128.88536071777344, 92.21304321289062, 92.06915283203125, 44.84516906738281, -96.37317657470703, 152.74705505371094, 0.3179950714111328, 283.8001708984375, 15.176128387451172, -2.768096923828125, 20.642070770263672, -11.637775421142578, 216.96621704101562, 8.734676361083984, 204.97946166992188, 82.10916137695312, -47.8917236328125, -62.850303649902344, 53.695884704589844, 10.607673645019531, 46.67594909667969, 54.569007873535156, 41.28517150878906, 46.2977294921875, 16.057340621948242, -20.794029235839844, -0.2042675018310547, -11.998149871826172, -43.93714141845703, 0.1105499267578125, 36.099239349365234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 39.04181671142578, "std": 67.14470672607422, "min": -204.67852783203125, "p10": -34.25244293212891, "median": 29.331405639648438, "p90": 130.2506042480469, "max": 210.92665100097656, "pos_frac": 0.765625, "sample": [31.67780113220215, 66.14447021484375, -0.7425136566162109, 13.653312683105469, 69.15994262695312, -33.84941101074219, 62.42170715332031, -0.5176334381103516, -0.9287033081054688, -34.4251708984375, -55.67561340332031, 110.57620239257812, -18.337181091308594, 136.5026397705078, 68.15007019042969, 121.23568725585938, 82.35682678222656, 19.33632469177246, 146.66610717773438, 145.130859375, 28.230880737304688, 35.24042510986328, 10.824806213378906, 67.81059265136719, 63.753150939941406, 3.4134178161621094, 63.764183044433594, 210.92665100097656, 9.289741516113281, 13.065361022949219, 10.893045425415039, 133.56045532226562, 83.66716003417969, 23.266841888427734, 99.51475524902344, 43.75984191894531, 22.715639114379883, 64.33699035644531, 31.668426513671875, 178.39012145996094, 0.7808475494384766, 147.34739685058594, -7.086067199707031, 19.97803497314453, -204.67852783203125, 22.863622665405273, 86.24344635009766, -46.90611267089844, 61.382408142089844, 122.52761840820312, -44.15510559082031, -43.13456344604492, 13.135873794555664, 27.521045684814453, -30.183212280273438, 87.893798828125, 30.431930541992188, 64.94972229003906, -85.11669921875, 57.526206970214844, 16.635498046875, 0.2899799346923828, 94.48616027832031, -20.685386657714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 29.389965057373047, "std": 68.009765625, "min": -149.13638305664062, "p10": -33.445855712890626, "median": 17.38933563232422, "p90": 96.8362045288086, "max": 297.6363525390625, "pos_frac": 0.6875, "sample": [3.7158737182617188, -7.844879150390625, 99.1187744140625, 125.19644165039062, 51.21568298339844, 92.85086059570312, 9.154523849487305, 97.48597717285156, -52.95658874511719, 20.224184036254883, 70.69989013671875, 16.79754638671875, 3.9293975830078125, 79.76592254638672, -48.97596740722656, 68.05148315429688, -33.929771423339844, 21.221511840820312, -97.48210144042969, -149.13638305664062, -15.738285064697266, 153.5438232421875, 75.16548919677734, -8.313240051269531, 53.915687561035156, 5.518487930297852, 77.64077758789062, -42.75105285644531, 34.024169921875, 28.708465576171875, 67.6396255493164, 8.697608947753906, -5.54473876953125, 42.85357666015625, -1.8255767822265625, 191.9736328125, -27.697654724121094, 95.320068359375, 7.5569305419921875, 297.6363525390625, 14.188674926757812, 87.67124938964844, -2.0126476287841797, 16.373666763305664, 16.821975708007812, 51.48625183105469, -7.556140899658203, -11.362564086914062, 68.00096130371094, 159.43450927734375, 48.19745635986328, 22.51434326171875, -10.96792221069336, 22.31189727783203, -112.0421142578125, -11.172286987304688, 17.956695556640625, -1.4364509582519531, 19.426977157592773, 16.053321838378906, 24.894975662231445, -32.31671905517578, 3.7756805419921875, 73.28944396972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 32.45248794555664, "std": 57.409820556640625, "min": -198.3001708984375, "p10": -12.300586318969726, "median": 24.95199489593506, "p90": 87.9282180786133, "max": 230.70664978027344, "pos_frac": 0.8125, "sample": [37.183746337890625, -1.3464736938476562, 11.438699722290039, 26.914953231811523, -20.749874114990234, 100.96830749511719, 89.36869049072266, 22.989036560058594, -31.488754272460938, 82.86382293701172, 92.31932830810547, 31.472644805908203, 42.620758056640625, 84.5671157836914, 75.59339141845703, 37.537166595458984, 7.217201232910156, 11.116554260253906, -5.176580429077148, 29.570755004882812, 27.927600860595703, 10.545278549194336, 20.852157592773438, 19.917377471923828, -34.5692138671875, -5.158655166625977, 5.2977294921875, 14.689172744750977, 172.71414184570312, 28.36188507080078, 64.59400177001953, 152.94979858398438, 22.680767059326172, 62.7762451171875, 33.01442337036133, -12.743972778320312, 10.570735931396484, 230.70664978027344, 15.479049682617188, -11.26601791381836, 38.481937408447266, -19.580703735351562, 39.32799530029297, 45.13473892211914, 17.009790420532227, 5.418708801269531, 12.619878768920898, 1.3408317565917969, 6.680084228515625, 68.39097595214844, 38.617774963378906, 73.20150756835938, -6.20343017578125, 33.873626708984375, 82.8478775024414, -198.3001708984375, -63.45561218261719, 40.602195739746094, 21.646350860595703, 60.30110168457031, 21.53125, 27.90497589111328, 159.62693786621094, 15.620895385742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 49.350799560546875, "std": 82.57282257080078, "min": -129.3666229248047, "p10": -35.60781555175781, "median": 34.33307647705078, "p90": 168.8206298828125, "max": 285.77581787109375, "pos_frac": 0.75, "sample": [133.57965087890625, -82.09178924560547, 39.35221862792969, 196.58444213867188, 27.382293701171875, 59.04174041748047, -0.8378219604492188, -14.981266021728516, 95.44146728515625, -4.375, 69.1193618774414, 26.06667709350586, -11.238231658935547, 18.581703186035156, 71.685791015625, 126.52664184570312, -12.436904907226562, 285.77581787109375, 122.08534240722656, -3.7420272827148438, 2.733854293823242, 33.30028533935547, 43.92693328857422, -49.4114990234375, 230.81402587890625, 18.292665481567383, 35.365867614746094, 213.96148681640625, 5.005865097045898, 28.477123260498047, 259.0457763671875, 47.83850860595703, -79.67926025390625, 22.592212677001953, -32.1982421875, -129.3666229248047, 44.45063400268555, 12.820404052734375, 101.81703186035156, 13.249153137207031, 38.36553955078125, 20.528839111328125, -37.069061279296875, -7.096775054931641, 47.80775451660156, 171.17208862304688, 187.45840454101562, -62.979583740234375, -88.08354187011719, 15.281349182128906, 64.43791198730469, -0.2639293670654297, 27.975128173828125, 83.8705825805664, 130.73544311523438, 59.71221160888672, 13.809404373168945, 18.66038703918457, 159.06539916992188, 43.02475357055664, 43.23778533935547, 35.83929443359375, 163.33389282226562, 65.07161712646484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 33.60929870605469, "std": 65.57806396484375, "min": -77.25086212158203, "p10": -49.40525512695312, "median": 26.97345542907715, "p90": 116.34534301757817, "max": 264.06292724609375, "pos_frac": 0.671875, "sample": [36.56220245361328, 76.46804809570312, -76.891357421875, 16.437313079833984, 137.24327087402344, 1.7281742095947266, -58.02996826171875, 62.210636138916016, -1.285257339477539, 39.61900329589844, 70.12560272216797, 69.13192749023438, 83.23680877685547, 15.967208862304688, -69.0279312133789, 182.3313751220703, -16.309341430664062, -0.5313568115234375, 14.508377075195312, 141.44715881347656, 7.209442138671875, 49.56735610961914, 264.06292724609375, -48.00694274902344, 31.192825317382812, 84.64542388916016, -53.36223602294922, 12.750011444091797, 38.389122009277344, -14.137619018554688, -50.00453186035156, 27.882266998291016, 103.97505187988281, -58.97456359863281, 19.850051879882812, 40.340057373046875, 91.21936798095703, -26.820968627929688, 27.242774963378906, -9.378070831298828, 22.220718383789062, -38.31463623046875, 88.77558898925781, 2.51177978515625, -41.71546936035156, 69.30307006835938, 26.70413589477539, 121.64689636230469, 8.491020202636719, 75.30306243896484, -7.062980651855469, -47.03498840332031, -0.5775661468505859, 74.17796325683594, -3.079803466796875, 45.05955505371094, -2.2366371154785156, 135.60165405273438, 68.8268051147461, 45.480316162109375, 94.52656555175781, -77.25086212158203, 91.34326171875, 135.7119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 36.09873580932617, "std": 60.56501388549805, "min": -104.43417358398438, "p10": -22.833811950683593, "median": 27.920469284057617, "p90": 101.4445411682129, "max": 242.8406982421875, "pos_frac": 0.71875, "sample": [-17.072410583496094, 13.051643371582031, 0.0564117431640625, 93.30707550048828, -10.028572082519531, 6.654327392578125, -82.11712646484375, -24.253372192382812, 93.3708724975586, 32.582305908203125, 103.5145263671875, -0.97216796875, 77.5308837890625, 2.7618541717529297, -39.246315002441406, 180.09445190429688, 74.12012481689453, 242.8406982421875, -104.43417358398438, 0.7963409423828125, 39.36153793334961, -22.94379425048828, 44.8275146484375, -16.641063690185547, -46.153411865234375, 101.22190856933594, -22.577186584472656, 110.7537841796875, 38.358116149902344, 1.2636547088623047, 101.53995513916016, -5.619869232177734, 44.58390808105469, -2.4177989959716797, 69.76177215576172, 18.12845230102539, 64.32870483398438, 208.70252990722656, 12.509868621826172, 93.8056640625, 67.76436614990234, 108.17793273925781, -4.636650085449219, -19.856643676757812, 15.639900207519531, 43.772212982177734, 44.33433532714844, 62.72625732421875, 50.253700256347656, 15.682485580444336, 1.8764572143554688, -17.991703033447266, 63.208438873291016, 23.214458465576172, 23.25863265991211, -11.475517272949219, 50.37218475341797, 91.20350646972656, 67.96583557128906, 39.710693359375, 18.75505256652832, -29.66021728515625, 82.47262573242188, 48.199134826660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 34.32225799560547, "std": 52.4395866394043, "min": -65.4114761352539, "p10": -23.74437713623047, "median": 19.38075542449951, "p90": 89.55270309448242, "max": 242.75576782226562, "pos_frac": 0.78125, "sample": [19.173973083496094, 10.993497848510742, 42.603675842285156, 90.03412628173828, 83.61743927001953, 88.42938232421875, 2.2704620361328125, 27.263168334960938, 77.02338409423828, -36.023773193359375, 140.87655639648438, 50.863250732421875, 81.72032165527344, 71.71404266357422, 242.75576782226562, 26.63589859008789, -46.94176483154297, -16.652801513671875, -22.576339721679688, 135.10040283203125, -20.118297576904297, 19.58753776550293, 31.81363296508789, 11.990127563476562, 66.4388427734375, 0.59283447265625, 77.50733184814453, 5.674369812011719, 2.394542694091797, 7.472709655761719, -1.3545856475830078, 7.3431396484375, 13.01913070678711, 16.1417236328125, 40.75914001464844, 101.44868469238281, 104.72928619384766, -51.511871337890625, -7.134313583374023, 103.590576171875, 82.05199432373047, 38.89894485473633, 88.11441802978516, 10.270822525024414, 11.91163444519043, 17.32471466064453, 46.13206481933594, 3.363950729370117, 76.84246063232422, -6.932716369628906, -30.482444763183594, 82.27543640136719, -24.244964599609375, 40.16510772705078, 12.1282958984375, 5.382286071777344, -26.396163940429688, 25.81536102294922, 66.21998596191406, 47.374000549316406, 80.34367370605469, 17.188735961914062, -65.4114761352539, -0.9769859313964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 42.09075164794922, "std": 57.5733642578125, "min": -68.84500122070312, "p10": -27.478451538085935, "median": 34.02371025085449, "p90": 118.35354843139649, "max": 210.49850463867188, "pos_frac": 0.765625, "sample": [36.264034271240234, -34.92356872558594, 63.6971435546875, 28.875701904296875, 0.5402374267578125, -33.60235595703125, 22.629364013671875, 84.3397216796875, 4.7717742919921875, -68.84500122070312, 29.162565231323242, 5.046173095703125, 36.14634704589844, 19.922271728515625, 15.279010772705078, -0.7478084564208984, 95.77798461914062, 10.929122924804688, -16.596820831298828, 198.18484497070312, 5.565767288208008, 62.36454391479492, 52.13713073730469, 78.53874206542969, 135.01358032226562, 74.31069946289062, 10.586456298828125, 43.675079345703125, 34.80428695678711, -9.133880615234375, -10.551298141479492, 24.105802536010742, 56.39124298095703, 129.33004760742188, 60.70230484008789, 28.823986053466797, 51.09222412109375, 116.74646759033203, 38.8488655090332, 26.812841415405273, 5.108783721923828, 60.59601593017578, -28.687355041503906, -41.14714813232422, 95.935791015625, 114.57427978515625, -17.710176467895508, 78.71408081054688, 210.49850463867188, -24.657676696777344, -45.35887145996094, 119.80659484863281, 67.97771453857422, 99.5128173828125, -7.5001678466796875, 30.296112060546875, -29.897445678710938, -16.53106689453125, 51.75044250488281, 33.243133544921875, 51.5810661315918, 97.43833923339844, 162.206298828125, 119.04229736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 33.15034484863281, "std": 57.854957580566406, "min": -163.16627502441406, "p10": -23.50233688354492, "median": 21.15165615081787, "p90": 109.66863250732425, "max": 170.55210876464844, "pos_frac": 0.765625, "sample": [43.351287841796875, 77.2977066040039, -22.970741271972656, 38.494903564453125, 29.324159622192383, 139.37197875976562, 5.621181488037109, 10.169435501098633, -19.202255249023438, 8.764656066894531, 18.255834579467773, 12.378036499023438, 56.32638168334961, 142.4407196044922, 68.31069946289062, -29.62572479248047, 54.78961944580078, 1.8579540252685547, 27.757705688476562, 78.86572265625, 52.83384704589844, 64.81431579589844, 25.92668342590332, 19.9913272857666, 12.007741928100586, -32.926055908203125, 15.320404052734375, 114.8386001586914, 27.060773849487305, 42.32404327392578, -24.656517028808594, -23.73016357421875, 73.27069091796875, 170.55210876464844, 112.90728759765625, 96.24493408203125, 83.93757629394531, -3.553255081176758, -6.903144836425781, 149.53695678710938, 67.33932495117188, 149.249267578125, 102.11177062988281, 0.4637451171875, 18.513734817504883, 10.890398025512695, -3.8609695434570312, 44.568511962890625, 74.55602264404297, -163.16627502441406, 3.3214759826660156, -1.1579513549804688, -85.51541137695312, 92.51828002929688, 61.19507598876953, 14.018726348876953, 48.65618896484375, 13.334884643554688, 20.27522087097168, 22.028091430664062, -8.713045120239258, -5.619501113891602, 15.772565841674805, -80.53543853759766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 45.4787712097168, "std": 65.20545959472656, "min": -102.49568176269531, "p10": -14.404221725463863, "median": 25.377726554870605, "p90": 134.18510665893558, "max": 263.3116760253906, "pos_frac": 0.828125, "sample": [113.91423034667969, 6.37957763671875, 62.94642639160156, 50.8472785949707, 8.613494873046875, 111.4039306640625, 8.082412719726562, 4.762363433837891, 1.0893001556396484, 13.0728759765625, 180.47511291503906, 34.95347595214844, -15.991676330566406, -17.568157196044922, 9.879154205322266, 29.00821304321289, 41.36513137817383, 59.79657745361328, 113.55081939697266, 154.28158569335938, 12.136558532714844, 63.72810363769531, 62.699867248535156, 259.4083251953125, 127.68851470947266, 8.49777603149414, 14.715221405029297, -8.616867065429688, 12.830230712890625, 13.113121032714844, 13.626823425292969, 263.3116760253906, 45.96192169189453, 41.77015686035156, 14.59503173828125, 136.9693603515625, 112.05281066894531, 5.725437164306641, 54.915122985839844, 1.9224414825439453, 82.52182006835938, 28.713224411010742, 43.48821258544922, -33.01817321777344, 21.082767486572266, 0.4397449493408203, -19.950576782226562, 68.96454620361328, -10.70016098022461, 45.593589782714844, 14.194198608398438, 93.7900390625, -21.8812255859375, 29.757705688476562, 144.10263061523438, 22.04222869873047, 21.796297073364258, 144.66139221191406, 92.57698059082031, -30.460243225097656, -3.048837661743164, 55.163536071777344, -102.49568176269531, -4.606422424316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 33.94279861450195, "std": 50.30982208251953, "min": -98.27681732177734, "p10": -22.429965209960937, "median": 31.823521614074707, "p90": 98.87859497070313, "max": 178.438720703125, "pos_frac": 0.734375, "sample": [26.071823120117188, 87.90646362304688, -47.473968505859375, -12.868911743164062, -10.992420196533203, -20.11815643310547, 22.711544036865234, 39.403785705566406, 1.9975757598876953, 65.95980834960938, 43.82100296020508, 61.75862121582031, -2.1407928466796875, 23.368223190307617, 105.52139282226562, 105.28306579589844, -48.362876892089844, 89.00291442871094, 0.8885040283203125, 17.305429458618164, 33.49896240234375, 4.559589385986328, 21.657470703125, -43.49256896972656, 153.773193359375, 21.335298538208008, 34.87171936035156, 37.97505187988281, 59.87049865722656, 19.761516571044922, 44.42950439453125, -15.523918151855469, 31.194442749023438, -26.887147903442383, 80.07608795166016, 99.48837280273438, 63.49784851074219, 44.10298538208008, -98.27681732177734, -22.729129791259766, 77.07730865478516, 33.273624420166016, -25.446334838867188, 15.936389923095703, 83.68208312988281, 51.49482345581055, -1.3694133758544922, 31.1997013092041, 33.76176452636719, 26.58576202392578, -10.068471908569336, 113.33338928222656, -12.751502990722656, 66.44131469726562, -21.731914520263672, 128.57676696777344, -11.666175842285156, 32.44734191894531, 66.35306549072266, 97.45578002929688, 44.730037689208984, 18.84646987915039, 178.438720703125, 63.51249694824219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 38.55535125732422, "std": 70.16474151611328, "min": -231.91873168945312, "p10": -34.868531417846675, "median": 46.53466796875, "p90": 100.86660079956057, "max": 230.52682495117188, "pos_frac": 0.765625, "sample": [76.63796997070312, 4.915283203125, 71.08480834960938, 88.89723205566406, 8.950660705566406, 31.580347061157227, 41.24150085449219, 37.202178955078125, 63.324981689453125, 14.984718322753906, 42.53654479980469, -73.38790893554688, 29.8543701171875, 54.76146697998047, 103.19186401367188, 80.19569396972656, -33.297176361083984, -14.66802978515625, -10.814201354980469, -45.4923095703125, 55.93268966674805, 49.374664306640625, -67.65432739257812, -108.196533203125, 75.64199829101562, 77.64453125, 43.27410888671875, 93.95426177978516, 108.09657287597656, 31.20037841796875, 73.84602355957031, -11.65755844116211, 95.44098663330078, 230.52682495117188, -231.91873168945312, 147.25633239746094, 91.65634155273438, 43.694671630859375, 81.06010437011719, 40.21788787841797, -23.48114776611328, 55.702964782714844, -131.48880004882812, 54.901893615722656, 74.51139831542969, 1.9354629516601562, 58.5595703125, 115.87692260742188, 85.09233093261719, 51.41123580932617, 28.98670196533203, 34.74552917480469, 152.48782348632812, 178.96078491210938, 50.190086364746094, 62.76021194458008, 61.80912780761719, 29.01975440979004, 25.94388198852539, -18.059181213378906, 73.69281768798828, -11.230236053466797, -0.33585548400878906, -35.541969299316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 50.522987365722656, "std": 72.76750183105469, "min": -67.44483947753906, "p10": -24.33005676269531, "median": 41.610591888427734, "p90": 152.6795608520508, "max": 297.4832763671875, "pos_frac": 0.765625, "sample": [-1.5422916412353516, -31.43920135498047, 8.131254196166992, 33.60126876831055, -7.075233459472656, 57.04385757446289, 55.394805908203125, 59.82402038574219, 178.6015625, 30.8791446685791, 47.997154235839844, 210.30819702148438, -3.584583282470703, -59.50538635253906, 163.76148986816406, -23.13928985595703, 35.65412139892578, 45.843231201171875, -4.448436737060547, 42.53822326660156, 186.15078735351562, 26.414268493652344, 18.182703018188477, 251.44570922851562, 18.899747848510742, 115.24038696289062, -12.814872741699219, -55.958290100097656, 12.030237197875977, -67.44483947753906, -24.84038543701172, 67.1180419921875, 57.385154724121094, 46.84214401245117, 105.64591979980469, 297.4832763671875, 113.42579650878906, 68.77964782714844, 19.295822143554688, 40.682960510253906, 10.542343139648438, 43.63802719116211, 68.90415954589844, 138.6233367919922, 153.72116088867188, 18.0091552734375, 15.959304809570312, 101.42465209960938, -19.050132751464844, -14.447835922241211, 49.03272247314453, 150.24916076660156, 46.26704025268555, 11.48886489868164, 101.16429138183594, -31.0628662109375, 59.4535026550293, 18.014474868774414, 99.9649658203125, 51.810325622558594, 24.33154296875, 63.67388916015625, 7.743541717529297, -58.79266357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 23.65607452392578, "std": 63.64777374267578, "min": -163.95144653320312, "p10": -43.77079849243164, "median": 23.347576141357422, "p90": 94.9694694519043, "max": 233.29049682617188, "pos_frac": 0.734375, "sample": [69.66727447509766, 35.15210723876953, -11.644912719726562, 18.035995483398438, 35.7169303894043, -29.885147094726562, 44.36347198486328, 49.05632781982422, 4.031660079956055, 128.7276611328125, 0.37298583984375, 1.7281227111816406, 64.04107666015625, 32.640724182128906, 8.834339141845703, 124.55313110351562, 24.732826232910156, -74.88240051269531, -95.36448669433594, -139.248291015625, -21.78870964050293, -19.87793731689453, -1.7777881622314453, -112.40164184570312, 72.32170104980469, 11.541130065917969, 65.70222473144531, 21.962326049804688, 1.1679706573486328, 5.730249404907227, 62.62895202636719, -13.170394897460938, 54.97239685058594, 10.32003402709961, 89.82966613769531, 35.73041534423828, 13.610136032104492, 15.4437255859375, 95.7155532836914, 51.027130126953125, 67.10865783691406, 26.881072998046875, 6.2996063232421875, -1.683523178100586, 49.00132751464844, 46.16991424560547, -47.152069091796875, -46.00391387939453, -13.803153991699219, 100.48936462402344, 52.540565490722656, 98.09957122802734, 31.35484504699707, -163.95144653320312, 233.29049682617188, -25.16597557067871, 4.131927490234375, 133.70339965820312, -38.56019592285156, 93.22860717773438, 66.65337371826172, 13.03521728515625, 58.22338104248047, 40.781192779541016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 31.800077438354492, "std": 69.24372100830078, "min": -102.5204849243164, "p10": -45.13888168334961, "median": 23.817631721496582, "p90": 138.6332382202149, "max": 212.86117553710938, "pos_frac": 0.671875, "sample": [172.86070251464844, 153.8826904296875, 41.54638671875, 46.747344970703125, -39.850868225097656, 163.65054321289062, 163.888916015625, 20.751968383789062, 144.3427734375, 189.2218780517578, -89.35446166992188, 48.4102783203125, -70.25809478759766, 62.758705139160156, 20.057897567749023, -24.119773864746094, -47.78638458251953, 19.430280685424805, 106.2885513305664, 30.371932983398438, -36.549163818359375, 62.0345458984375, 80.32827758789062, 97.23554229736328, 18.707435607910156, 24.180622100830078, 23.138477325439453, 88.40010833740234, 212.86117553710938, 48.13793182373047, -33.939178466796875, -8.947372436523438, -20.90646743774414, 72.48832702636719, 23.454641342163086, 59.70033264160156, 125.31098937988281, 85.98239135742188, -0.333709716796875, 7.188060760498047, -5.099386215209961, -16.60886001586914, 27.10515594482422, -17.07952117919922, -38.87782287597656, -31.0441951751709, 70.5903091430664, 41.228580474853516, 11.963184356689453, 37.772789001464844, -102.5204849243164, 36.857906341552734, -25.52655029296875, -57.860897064208984, 29.84183120727539, 22.878694534301758, -45.32699966430664, 64.24153137207031, 10.854377746582031, 1.4931812286376953, 62.349876403808594, -44.6999397277832, 59.036643981933594, -97.67863464355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 21.564611434936523, "std": 66.44694519042969, "min": -170.97430419921875, "p10": -46.701605224609374, "median": 26.941256523132324, "p90": 101.86376113891602, "max": 197.74301147460938, "pos_frac": 0.625, "sample": [5.524127960205078, 43.44823455810547, 102.80782318115234, 37.96507263183594, -37.20826721191406, 7.636447906494141, 8.249984741210938, -89.02699279785156, 28.862035751342773, -170.97430419921875, 33.32037353515625, 151.62689208984375, -29.80864715576172, 44.936912536621094, 44.287166595458984, -15.337963104248047, 2.9865341186523438, 40.33503723144531, 76.84294891357422, 1.8359565734863281, -45.51544189453125, -9.092090606689453, 81.10816955566406, -13.505472183227539, 111.33430480957031, 34.35410690307617, 51.48974609375, 197.74301147460938, -7.494468688964844, -4.1969451904296875, 162.16085815429688, -1.3924102783203125, 47.18149185180664, 55.951698303222656, 7.049526214599609, -35.80500793457031, 50.59038543701172, -157.03433227539062, 25.020477294921875, -48.5123291015625, 1.5677223205566406, -101.16819763183594, -36.60047912597656, -17.787986755371094, 86.34818267822266, 99.66094970703125, 67.47037506103516, 117.61923217773438, 70.92225646972656, -14.436187744140625, 103.3257827758789, -61.43211364746094, -15.674812316894531, -8.802295684814453, 68.5937728881836, -47.2099609375, 59.810462951660156, 52.57158660888672, 39.483314514160156, 43.46986389160156, -23.920272827148438, -26.533798217773438, 59.080169677734375, 74.03297424316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 47.94136428833008, "std": 75.86981201171875, "min": -109.76776123046875, "p10": -18.20170097351074, "median": 38.50716018676758, "p90": 129.1961479187012, "max": 320.9969482421875, "pos_frac": 0.8125, "sample": [-16.61130142211914, 10.693267822265625, 16.800493240356445, 53.5185546875, -80.63343811035156, -48.58601379394531, 33.5528678894043, 49.15735626220703, 21.967559814453125, 20.269939422607422, 37.22792434692383, 56.471160888671875, 33.967742919921875, 8.692848205566406, -11.67138671875, 86.18310546875, 169.52099609375, 49.936912536621094, 72.67088317871094, 111.80486297607422, -99.81571960449219, -18.88330078125, 80.51078796386719, 7.96697998046875, -107.22746276855469, 106.33255767822266, 45.30198669433594, -24.332595825195312, 22.35057830810547, 18.797210693359375, 85.29474639892578, 297.7629089355469, 44.813133239746094, 10.231067657470703, 114.79635620117188, 116.5531997680664, 12.426734924316406, -4.077873229980469, 25.037555694580078, 17.46458625793457, -13.696525573730469, 59.407440185546875, 108.6861801147461, -109.76776123046875, 43.52716064453125, 132.27615356445312, 117.32316589355469, 38.49037170410156, 320.9969482421875, 154.02967834472656, 145.6629638671875, 135.21104431152344, 8.196319580078125, 67.87024688720703, 90.2910385131836, -2.9967594146728516, 38.523948669433594, 3.162900924682617, 0.057796478271484375, 62.20598602294922, 122.00946807861328, 17.66858673095703, 47.760711669921875, 55.11237335205078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 46.05828094482422, "std": 79.03309631347656, "min": -105.52311706542969, "p10": -35.639975738525386, "median": 27.707402229309082, "p90": 151.12551116943362, "max": 334.63470458984375, "pos_frac": 0.703125, "sample": [155.1827392578125, -21.942611694335938, 18.469575881958008, 31.854942321777344, 86.61283874511719, -13.466392517089844, -58.89647674560547, 3.989175796508789, -29.07257843017578, 49.63898468017578, -19.816953659057617, 51.7237663269043, 215.18145751953125, 64.64849853515625, 134.58236694335938, -11.596382141113281, 137.37124633789062, 36.845848083496094, -45.914955139160156, 171.309326171875, 83.887451171875, -0.31024932861328125, 19.929424285888672, 7.958152770996094, 20.871610641479492, 1.4825801849365234, 334.63470458984375, -71.3503189086914, 76.26757049560547, 193.26760864257812, 192.01751708984375, 63.20220947265625, 19.61298370361328, 114.54917907714844, 16.151079177856445, 22.534027099609375, -28.902599334716797, -38.45457458496094, 208.12957763671875, 24.44659423828125, 61.60820007324219, -52.215972900390625, 97.85340881347656, -25.883407592773438, 107.50778198242188, 46.09236145019531, 50.29022979736328, 64.61405944824219, 90.9415283203125, 49.73118591308594, 7.2859954833984375, 141.6586456298828, -0.5103378295898438, 48.43250274658203, 78.6304931640625, 93.37334442138672, -23.4368953704834, 30.968210220336914, -1.04132080078125, 22.859745025634766, -59.85521697998047, -1.5663566589355469, -105.52311706542969, 9.285846710205078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 65.26294708251953, "std": 65.76915740966797, "min": -143.56744384765625, "p10": 0.9341535568237331, "median": 61.434810638427734, "p90": 139.4359573364258, "max": 262.1049499511719, "pos_frac": 0.890625, "sample": [100.02413940429688, 41.02720642089844, 51.54905700683594, 24.743881225585938, 46.135215759277344, 78.37786102294922, 96.62760925292969, 138.97830200195312, 3.492856979370117, -143.56744384765625, -28.53717803955078, 112.93197631835938, 133.40435791015625, 12.11286735534668, 39.87123107910156, 71.29922485351562, -50.25519561767578, -8.217536926269531, -29.20330810546875, 147.22909545898438, 43.161773681640625, 163.0427703857422, 59.51084899902344, 31.18408203125, 7.0684661865234375, 114.77672576904297, 87.44781494140625, 60.77946472167969, 11.356483459472656, 42.021278381347656, 124.92701721191406, 262.1049499511719, 34.142005920410156, 70.92144012451172, 35.090171813964844, 50.110191345214844, 104.26087951660156, 139.63209533691406, 69.64533996582031, 17.615482330322266, 62.09015655517578, 117.60443115234375, 51.63286590576172, 51.600547790527344, 107.93502807617188, 72.08979034423828, -0.16243362426757812, 84.40608215332031, 96.828125, 166.81716918945312, 65.0392837524414, 28.143157958984375, -42.04231262207031, 15.400978088378906, 99.25111389160156, 81.0523681640625, 73.5600814819336, 179.71484375, 86.02301025390625, 241.45079040527344, 41.80790710449219, 7.893455505371094, 15.402923583984375, 106.46571350097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 40.033287048339844, "std": 57.30054473876953, "min": -75.12023162841797, "p10": -13.127968597412108, "median": 37.542776107788086, "p90": 114.12156448364264, "max": 179.34002685546875, "pos_frac": 0.78125, "sample": [40.268585205078125, 8.96064567565918, 21.2956485748291, -11.695428848266602, 19.35205078125, 4.290613174438477, 65.29698181152344, -12.601905822753906, 37.226402282714844, 88.93147277832031, 47.11039733886719, 45.804893493652344, 1.960165023803711, 93.75177001953125, 46.71998596191406, 168.2053680419922, 12.825292587280273, 10.661640167236328, 120.95208740234375, 4.156034469604492, 79.11100769042969, 59.43556594848633, -6.5054779052734375, 129.194580078125, 34.750030517578125, 70.03726959228516, -4.264152526855469, -2.313720703125, 98.18367767333984, -74.92063903808594, 66.5508804321289, 13.388984680175781, 57.9252815246582, 51.43320083618164, -7.473779678344727, -8.283744812011719, 30.0594482421875, -13.353424072265625, 143.52879333496094, 76.09827423095703, 98.09088134765625, 31.378101348876953, -75.12023162841797, 14.139808654785156, 179.34002685546875, 67.38235473632812, 37.85914993286133, 29.38965606689453, 60.90447998046875, 16.942665100097656, 0.2374134063720703, 91.28947448730469, 145.49456787109375, 45.817100524902344, 82.98635864257812, 82.89955139160156, -51.10346221923828, 52.774742126464844, -70.59062194824219, 34.538856506347656, 150.2657012939453, -68.94646453857422, -63.024864196777344, 63.130531311035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 44.61316680908203, "std": 68.24613189697266, "min": -161.8627166748047, "p10": -20.00443038940429, "median": 25.800976753234863, "p90": 119.6459396362305, "max": 233.6039581298828, "pos_frac": 0.765625, "sample": [122.42694091796875, 164.91403198242188, 67.27168273925781, 84.48567962646484, 32.131431579589844, 49.19404602050781, 94.05470275878906, -7.311016082763672, -161.8627166748047, 16.387296676635742, 22.515893936157227, 111.93107604980469, -79.965087890625, 29.218502044677734, 10.751916885375977, 2.8729400634765625, 19.28376579284668, 110.2046127319336, 10.716903686523438, -44.19012451171875, 163.84121704101562, 86.01033782958984, 131.44879150390625, 10.926488876342773, -42.41337585449219, 3.2693748474121094, -8.359352111816406, -5.2040557861328125, 233.6039581298828, 108.37969207763672, 18.942344665527344, 21.893333435058594, 155.68621826171875, 67.96961975097656, 89.19039916992188, 111.1480941772461, 19.599794387817383, 24.677047729492188, 23.927724838256836, -22.905601501464844, 89.93690490722656, -71.01310729980469, 79.83712768554688, 89.19497680664062, 8.083168029785156, 112.94833374023438, -12.386978149414062, 109.93930053710938, 65.14894104003906, 70.37548828125, 29.162506103515625, 35.301170349121094, -13.235031127929688, -49.89244842529297, -11.78375244140625, -3.42864990234375, 14.542257308959961, 97.02691650390625, -0.890533447265625, 25.889963150024414, 113.15693664550781, 187.46533203125, 17.487350463867188, 25.711990356445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 56.51047134399414, "std": 90.34970092773438, "min": -116.26473999023438, "p10": -38.161975479125964, "median": 36.55147933959961, "p90": 185.56809692382816, "max": 340.54248046875, "pos_frac": 0.78125, "sample": [45.609130859375, 217.57215881347656, 51.407135009765625, -91.66793823242188, -116.26473999023438, 178.7427978515625, 35.41748809814453, 17.701011657714844, 340.54248046875, 1.2680892944335938, -26.755126953125, 191.97903442382812, 14.495887756347656, 18.69330596923828, -8.934114456176758, 118.33922576904297, -11.250213623046875, 177.0266876220703, 254.26329040527344, 141.52584838867188, 161.6246337890625, 113.56591796875, 4.105493545532227, -22.874879837036133, 65.66682434082031, 13.195913314819336, 32.208534240722656, 18.3948974609375, 116.13156127929688, 4.812122344970703, 41.87702560424805, 2.360017776489258, 86.4498519897461, -48.06599426269531, 244.47836303710938, -70.84367370605469, 105.01838684082031, -43.05062484741211, -25.892723083496094, 7.886369705200195, 17.23815155029297, 15.750320434570312, 71.68582153320312, 250.19381713867188, 51.455055236816406, 13.810544967651367, 125.17322540283203, 115.67646789550781, 40.70153045654297, -92.4556884765625, 27.559356689453125, 63.35357666015625, 188.49322509765625, 88.38166809082031, -17.36273193359375, -0.7147617340087891, 37.68547058105469, 18.39724349975586, 87.55964660644531, -59.40290451049805, 43.220252990722656, 42.52666091918945, 97.98121643066406, 33.003692626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 56.805450439453125, "std": 83.39997100830078, "min": -223.45742797851562, "p10": -15.85507888793945, "median": 48.839698791503906, "p90": 155.34998016357423, "max": 295.4329528808594, "pos_frac": 0.796875, "sample": [58.405029296875, 10.201335906982422, 206.07688903808594, 127.8470687866211, 61.57854461669922, 68.51751708984375, -107.82783508300781, 4.8595733642578125, 24.739105224609375, 47.267547607421875, 206.71099853515625, 63.3273811340332, 118.78196716308594, 107.94093322753906, 61.3070068359375, 1.8165283203125, 27.481395721435547, 45.67790985107422, -24.810710906982422, -6.043479919433594, -13.743217468261719, 295.4329528808594, 2.8708839416503906, -7.66461181640625, 23.981313705444336, 7.188720703125, 167.3207244873047, 30.42911720275879, 50.41184997558594, -72.50285339355469, 91.10145568847656, 100.58549499511719, 11.358909606933594, 133.28237915039062, 70.53753662109375, 45.85289764404297, 152.07920837402344, 104.43623352050781, 156.75173950195312, 151.082763671875, -7.739845275878906, 25.028602600097656, -223.45742797851562, 28.614036560058594, -3.235931396484375, 90.97782135009766, 64.18405151367188, 39.04899978637695, 21.695295333862305, 105.0206298828125, -111.11065673828125, 42.98309326171875, 38.272335052490234, 173.15675354003906, 180.2871856689453, 60.15766143798828, 116.96273040771484, 58.18046569824219, 124.54153442382812, 151.53314208984375, -16.760162353515625, -61.253662109375, -2.8999996185302734, 136.71377563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 51.11765670776367, "std": 83.21610260009766, "min": -157.51417541503906, "p10": -26.49069290161132, "median": 33.30282974243164, "p90": 171.91269683837893, "max": 252.41986083984375, "pos_frac": 0.796875, "sample": [205.86837768554688, 99.05270385742188, 7.201107025146484, 72.89945983886719, 90.84859466552734, 48.86669158935547, 65.87484741210938, -10.16827392578125, -157.51417541503906, 62.558006286621094, 97.4918212890625, 19.28777313232422, -115.1099853515625, 23.75173568725586, 9.021015167236328, 5.164758682250977, 32.264366149902344, 245.09063720703125, 134.92904663085938, 34.34129333496094, 5.353099822998047, -20.67123794555664, 252.41986083984375, 205.7747802734375, 140.52391052246094, 85.35646057128906, 26.806121826171875, -50.929603576660156, -28.984745025634766, 71.70956420898438, -76.38360595703125, -14.636451721191406, 12.680244445800781, 20.167041778564453, 4.65131950378418, 244.65011596679688, 70.53916931152344, 112.0631103515625, 175.54360961914062, 8.480209350585938, 107.84236907958984, -11.406620979309082, 63.31177520751953, 30.859336853027344, 15.74057388305664, -0.35706329345703125, 110.24791717529297, -99.85626220703125, 107.21664428710938, -63.54168701171875, 57.441505432128906, 1.3966197967529297, 85.60076904296875, 52.465328216552734, 60.3387565612793, 60.88157653808594, 26.036453247070312, 22.008522033691406, 22.899099349975586, 53.60985565185547, 163.44056701660156, 3.030710220336914, -14.882091522216797, 200.3726806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 58.40650177001953, "std": 93.43913269042969, "min": -161.81051635742188, "p10": -69.61806640625, "median": 48.14710235595703, "p90": 179.64958953857422, "max": 271.5659484863281, "pos_frac": 0.765625, "sample": [271.5659484863281, -105.74862670898438, -161.81051635742188, 210.81903076171875, -4.694328308105469, 79.832275390625, 24.949623107910156, 47.352622985839844, 38.69839096069336, 212.7280731201172, 49.262168884277344, 102.25910949707031, 57.60533142089844, 87.40921020507812, 33.21685791015625, 188.3724365234375, 140.066650390625, -75.49903869628906, 138.23077392578125, 7.85517692565918, 105.46266174316406, -56.9693603515625, -39.42231369018555, -34.476104736328125, 100.91191101074219, 180.42726135253906, -15.703025817871094, 162.52049255371094, -75.0389404296875, 15.773246765136719, -55.758331298828125, 37.245948791503906, 221.12319946289062, 29.521011352539062, 230.4964599609375, 91.36923217773438, 52.53575134277344, 78.27525329589844, 99.46344757080078, 159.67550659179688, 28.783798217773438, -80.13626098632812, 21.528793334960938, 44.482852935791016, 177.83502197265625, 123.81852722167969, 34.12962341308594, 118.41059875488281, 169.6110382080078, 3.8711109161376953, 139.37045288085938, 47.99180603027344, 118.17157745361328, 97.45083618164062, 80.09520721435547, 17.596080780029297, -143.36366271972656, 37.1697998046875, 33.689483642578125, 48.302398681640625, -3.4559497833251953, 130.7758026123047, -111.65388488769531, -26.363426208496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 34.507667541503906, "std": 59.27382278442383, "min": -100.83062744140625, "p10": -43.837697601318354, "median": 36.62714195251465, "p90": 123.54592361450197, "max": 154.02325439453125, "pos_frac": 0.6875, "sample": [61.9376106262207, 154.02325439453125, 140.89027404785156, -9.756996154785156, -67.25009155273438, 46.08528518676758, 48.49610137939453, 66.08877563476562, 54.54512023925781, -0.17810821533203125, 120.16619873046875, 100.88313293457031, -7.0666961669921875, -1.972381591796875, 128.8880615234375, 69.27853393554688, 50.85773849487305, 66.8026351928711, 54.34498977661133, 53.85394287109375, 59.53564453125, 32.25527572631836, 91.81692504882812, -36.906585693359375, 25.452423095703125, 26.164213180541992, 41.413856506347656, 44.55764389038086, -34.96644592285156, 16.739055633544922, -2.937715530395508, -50.220394134521484, 40.99900817871094, 140.89041137695312, 150.73284912109375, -68.76191711425781, 54.81890869140625, 18.697132110595703, 110.611572265625, -2.9644718170166016, -78.06989288330078, 124.99437713623047, 48.70049285888672, 21.994735717773438, -33.06913757324219, 18.836231231689453, 94.52021789550781, -3.6359481811523438, 18.23200225830078, 139.37779235839844, 49.41699981689453, 12.326358795166016, 53.12860107421875, -100.83062744140625, 25.215234756469727, -54.23125457763672, -46.80817413330078, -31.842987060546875, 98.07587432861328, -0.48798370361328125, 27.632837295532227, 8.698097229003906, -20.05823516845703, 47.53034210205078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 35.46612548828125, "std": 58.28358459472656, "min": -72.151123046875, "p10": -28.29474334716797, "median": 30.810964584350586, "p90": 114.34438858032229, "max": 196.04559326171875, "pos_frac": 0.703125, "sample": [-47.89250946044922, 102.77801513671875, -20.58107566833496, 145.27734375, 1.0794830322265625, 63.98173522949219, 18.154212951660156, 40.44428253173828, 1.2984752655029297, -28.235031127929688, 6.3785247802734375, 119.20895385742188, 62.8018798828125, 196.04559326171875, 47.49314880371094, -21.90540885925293, -16.961990356445312, -28.27857208251953, -3.0730819702148438, 70.00404357910156, -38.49470520019531, 67.24285888671875, 116.97996520996094, -72.151123046875, 74.4334945678711, -28.301673889160156, -1.8757190704345703, 149.59124755859375, 75.31507110595703, -70.778076171875, 28.18494415283203, 86.07696533203125, 43.503971099853516, -14.63726806640625, 37.715484619140625, 60.09309387207031, 13.254447937011719, 108.19470977783203, 10.868255615234375, -42.97955322265625, 65.96810913085938, 92.66453552246094, -17.38117790222168, 3.9669361114501953, -28.658355712890625, 49.4179801940918, 48.79395294189453, 59.157073974609375, -10.842056274414062, 22.757503509521484, 175.75192260742188, -8.587574005126953, 38.50748062133789, 26.2540340423584, 140.47976684570312, 106.004150390625, 39.95764923095703, 70.36819458007812, -16.67705726623535, 5.857696533203125, 5.00018310546875, 6.101169586181641, 33.43698501586914, 51.278289794921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 58.719337463378906, "std": 90.29364776611328, "min": -81.00814819335938, "p10": -41.70706329345703, "median": 52.88278579711914, "p90": 149.87046813964844, "max": 487.75335693359375, "pos_frac": 0.765625, "sample": [260.45367431640625, 112.79581451416016, 67.00155639648438, 46.40308380126953, 43.18438720703125, 150.09683227539062, 27.82891273498535, 53.572547912597656, -81.00814819335938, 83.85366821289062, -41.17756652832031, 97.08187866210938, 39.011390686035156, 33.15571594238281, -58.26099395751953, 110.92619323730469, 16.76767349243164, -60.82526397705078, 44.590065002441406, 166.72962951660156, 11.265535354614258, 70.00575256347656, -2.52423095703125, 157.599365234375, 40.765724182128906, 71.60186004638672, 59.19950866699219, 28.656673431396484, 63.37324523925781, 71.76960754394531, 79.38274383544922, -10.54067611694336, -14.002685546875, 61.76190948486328, -41.933990478515625, 26.995820999145508, 72.04736328125, 109.74058532714844, 17.922637939453125, 149.34228515625, 71.650146484375, 101.89093017578125, 7.653745651245117, 54.86846160888672, 54.441837310791016, -29.288917541503906, 300.55645751953125, 12.042112350463867, 52.193023681640625, 172.87841796875, -20.20798683166504, 83.46107482910156, -58.558876037597656, -42.296348571777344, 487.75335693359375, 56.95466995239258, -5.883096694946289, 138.58636474609375, 16.449203491210938, -50.53935623168945, 33.90702819824219, 119.65188598632812, -32.92814254760742, 98.19146728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 40.69775390625, "std": 93.62239837646484, "min": -207.2354736328125, "p10": -52.875099182128906, "median": 21.23882293701172, "p90": 177.29527130126962, "max": 309.1070861816406, "pos_frac": 0.703125, "sample": [-11.086410522460938, -2.1706905364990234, 140.6337890625, 103.2174072265625, -97.44821166992188, -6.9002685546875, -62.416358947753906, 7.951181411743164, 19.67489242553711, 16.514602661132812, 37.68242645263672, -0.5974884033203125, 53.435184478759766, 56.98377227783203, 0.5090808868408203, 228.56199645996094, 25.84388542175293, -33.510833740234375, 309.1070861816406, -17.42828941345215, 114.33623504638672, 16.414520263671875, 185.8699951171875, 86.11751556396484, 213.63778686523438, -51.715606689453125, 13.455474853515625, 22.802753448486328, 10.03636360168457, 157.28758239746094, 271.12261962890625, 141.0594482421875, -13.118118286132812, -167.01095581054688, 15.749652862548828, 37.58802795410156, 83.63490295410156, 19.270320892333984, 0.5201568603515625, 58.90187072753906, 30.8509521484375, 85.51229858398438, 26.223182678222656, 39.03794479370117, 200.10214233398438, -70.47979736328125, 24.570167541503906, 38.03834533691406, -13.5299072265625, 18.505508422851562, 23.65213394165039, 105.70637512207031, 0.5317153930664062, -0.6418342590332031, -24.679676055908203, 140.06198120117188, 82.67808532714844, -28.11123275756836, 96.26524353027344, -207.2354736328125, 199.98162841796875, -94.93836212158203, 1.409402847290039, -53.37202453613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 45.62263488769531, "std": 63.77723693847656, "min": -67.45003509521484, "p10": -30.828695106506338, "median": 37.77073669433594, "p90": 126.3193283081055, "max": 260.29583740234375, "pos_frac": 0.765625, "sample": [6.922050476074219, 5.362613677978516, -48.48276901245117, -54.19239044189453, 27.12242317199707, 60.82068634033203, 83.48330688476562, 27.359603881835938, 59.814369201660156, 74.2217025756836, 50.590476989746094, -17.717317581176758, 52.92011260986328, 28.726356506347656, 10.177261352539062, -6.851459503173828, 61.590965270996094, -67.45003509521484, 102.17033386230469, 114.26866149902344, 50.15260314941406, -34.63029479980469, 44.31684875488281, 20.098793029785156, 72.28385925292969, 29.817825317382812, 149.596435546875, 129.33482360839844, 46.56831359863281, -41.63927459716797, 172.67648315429688, 37.504661560058594, 26.36197280883789, 61.45783996582031, -21.958295822143555, -51.132667541503906, -0.9249534606933594, 3.6929473876953125, -6.408273696899414, 135.0150146484375, 7.10943603515625, 196.8397674560547, 38.03681182861328, 132.86927795410156, 119.28317260742188, 54.67337417602539, 60.53314971923828, -50.048492431640625, -11.218038558959961, 115.2839584350586, 20.681774139404297, -6.222745895385742, 260.29583740234375, 86.0496597290039, 5.684211730957031, 64.71145629882812, 86.3587646484375, 84.02925109863281, -16.244722366333008, 17.594863891601562, 0.19606399536132812, 35.5726318359375, 113.42822265625, 111.30946350097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 33.996551513671875, "std": 68.3165512084961, "min": -127.63381958007812, "p10": -50.37383880615233, "median": 28.03285026550293, "p90": 110.6718902587891, "max": 240.06512451171875, "pos_frac": 0.765625, "sample": [4.794208526611328, 60.31140899658203, 75.19061279296875, -17.644573211669922, 24.03651237487793, -10.342903137207031, 21.271108627319336, 61.481056213378906, 22.065185546875, 51.67554473876953, 47.32999038696289, 89.03817749023438, 176.83428955078125, 126.99697875976562, 13.166847229003906, 66.28982543945312, -84.86048889160156, 145.5876922607422, -34.38578796386719, -113.3250732421875, -55.81846618652344, 5.320831298828125, 16.076740264892578, 77.54476928710938, 51.87898635864258, -37.669708251953125, -65.3974609375, 4.410192489624023, 28.68553924560547, -31.705787658691406, 48.26579666137695, 130.35476684570312, 15.547386169433594, 4.331787109375, 190.02560424804688, 12.685314178466797, 240.06512451171875, 17.062362670898438, 42.956024169921875, 86.04669952392578, 21.348445892333984, 27.38016128540039, 10.454652786254883, -23.538482666015625, -127.63381958007812, 41.85204315185547, 94.34423828125, 40.78407287597656, 114.7040786743164, 48.03887176513672, -73.24519348144531, -16.473604202270508, 3.8673667907714844, 65.02668762207031, 89.27943420410156, 97.96507263183594, 1.3550643920898438, 58.204063415527344, 101.2634506225586, 30.281875610351562, 62.99071502685547, -17.760086059570312, -77.96861267089844, 97.08167266845703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 42.942481994628906, "std": 61.80585479736328, "min": -81.55848693847656, "p10": -22.52602081298828, "median": 30.525959014892578, "p90": 131.67727050781255, "max": 201.862548828125, "pos_frac": 0.75, "sample": [-11.174217224121094, 21.9022216796875, -4.92451286315918, -49.366851806640625, 21.807889938354492, 15.624515533447266, 60.34677505493164, 49.883277893066406, 50.129859924316406, 149.26075744628906, 118.67002868652344, 26.142181396484375, 177.92501831054688, 35.552215576171875, -18.635902404785156, 60.65721893310547, -37.90865707397461, -39.61372375488281, 44.765201568603516, 188.69314575195312, 136.8531494140625, -10.864553451538086, -81.55848693847656, 2.0359344482421875, -65.31399536132812, 25.390748977661133, 2.8407211303710938, 46.22057342529297, 28.440357208251953, 41.48127746582031, 8.483535766601562, 92.34227752685547, 9.590484619140625, 91.62008666992188, 32.137901306152344, 57.49748229980469, 26.898448944091797, 68.19114685058594, 190.30743408203125, -13.802143096923828, 46.477691650390625, -24.193214416503906, -9.078262329101562, 49.45758056640625, 201.862548828125, 111.84114837646484, 122.05474853515625, 48.20428466796875, 89.15081024169922, 18.86118507385254, -6.178642272949219, 86.4755630493164, 135.80120849609375, 79.37850189208984, 67.54559326171875, 28.914016723632812, 47.677772521972656, 20.401229858398438, -2.0231971740722656, 27.841171264648438, -40.48316192626953, 100.45064544677734, 11.015144348144531, -11.664386749267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 42.652496337890625, "std": 75.85587310791016, "min": -100.17282104492188, "p10": -47.23986587524414, "median": 39.44911575317383, "p90": 131.121696472168, "max": 261.91748046875, "pos_frac": 0.703125, "sample": [16.024080276489258, 79.6358642578125, 54.97431945800781, 211.93682861328125, 31.83404541015625, -59.44206237792969, 1.5464324951171875, 56.682373046875, -5.063739776611328, 52.03790283203125, -22.3680419921875, -30.821990966796875, 261.91748046875, 61.87549591064453, 35.35143280029297, 154.87591552734375, 18.647159576416016, 14.162673950195312, 225.15745544433594, 61.093963623046875, 135.08975219726562, 93.81297302246094, 92.5585708618164, -24.795894622802734, 36.69615936279297, 42.20207214355469, -49.402076721191406, 23.35961151123047, 77.85101318359375, 83.99581909179688, -12.438011169433594, 1.9365386962890625, 119.89848327636719, 50.737422943115234, 222.46795654296875, 4.685920715332031, 85.1801986694336, 121.86289978027344, -1.8965167999267578, 34.44207000732422, -86.89314270019531, 52.736289978027344, -94.79745483398438, 152.11660766601562, 81.78509521484375, 79.38042449951172, -52.93711853027344, -45.48072814941406, 108.86396026611328, 43.18110275268555, -37.94091033935547, -100.17282104492188, -19.39661407470703, 119.73490905761719, 3.5657806396484375, 63.98014450073242, 67.27368927001953, 13.483598709106445, 68.19160461425781, -12.50320816040039, -47.99378204345703, -4.53424072265625, 53.64530944824219, -33.83123016357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 59.69820022583008, "std": 77.45616912841797, "min": -112.33702087402344, "p10": -29.642004013061516, "median": 48.073055267333984, "p90": 160.2033645629883, "max": 260.53094482421875, "pos_frac": 0.8125, "sample": [175.2040557861328, 118.71389770507812, 38.976806640625, 140.35574340820312, 44.0946044921875, 1.4300594329833984, -72.29998779296875, 118.60447692871094, 23.468576431274414, 97.29500579833984, 54.448890686035156, 199.02249145507812, 103.48711395263672, 6.315925598144531, 16.85248374938965, 33.552703857421875, 163.19723510742188, 80.32792663574219, 96.00196075439453, -55.947532653808594, -45.7640380859375, 145.2918243408203, 59.41444396972656, 35.195343017578125, 46.56000518798828, 49.58610534667969, 65.23539733886719, 4.529975891113281, 219.21926879882812, -112.33702087402344, -44.413307189941406, 26.197525024414062, 38.46428680419922, 5.347846984863281, 123.96339416503906, 21.52970314025879, 259.3968200683594, 59.960208892822266, 87.49722290039062, 2.135702133178711, 28.219165802001953, -16.89699935913086, 61.306488037109375, 260.53094482421875, 37.74931335449219, 45.198089599609375, 153.21766662597656, 196.82821655273438, 60.81791687011719, 60.81999969482422, -6.2830352783203125, 113.03862762451172, -82.92510986328125, 19.489234924316406, 76.0860366821289, 69.90158081054688, -7.723049163818359, 140.6480712890625, 103.78401184082031, -21.300838470458984, 88.18006896972656, -33.21678924560547, 45.06797790527344, -1.966064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 66.66979217529297, "std": 85.68122100830078, "min": -188.43362426757812, "p10": -14.246450042724609, "median": 57.776390075683594, "p90": 192.74673309326175, "max": 262.3616638183594, "pos_frac": 0.78125, "sample": [14.860723495483398, -14.371742248535156, -6.16851806640625, 246.89479064941406, -7.522771835327148, 16.403648376464844, 126.23561096191406, -19.557254791259766, 90.33866119384766, 102.25045013427734, -13.71612548828125, 77.07140350341797, 170.71388244628906, 116.56058502197266, 113.42622375488281, 7.942237854003906, 114.03141784667969, 72.62271118164062, 61.99334716796875, -60.00529479980469, 48.03453826904297, 134.00001525878906, -76.8798599243164, 210.8148651123047, 196.57591247558594, 96.63421630859375, 51.24671173095703, 23.21817398071289, 138.9502716064453, 10.10433578491211, 94.65426635742188, -24.86719512939453, 51.99644470214844, 130.70628356933594, 3.980417251586914, 246.6239013671875, 2.4884185791015625, 222.25531005859375, 53.55943298339844, 226.59063720703125, 17.167991638183594, 150.92205810546875, -16.107498168945312, 24.16168212890625, -13.9541015625, 15.573158264160156, 83.95022583007812, 27.31182861328125, -8.920074462890625, 77.54754638671875, 6.982320785522461, 20.917388916015625, 67.92068481445312, 183.81198120117188, 154.70887756347656, 82.21039581298828, -188.43362426757812, 75.6865234375, -6.7989501953125, 94.30155944824219, 262.3616638183594, 20.054859161376953, -3.9998703002929688, 88.79905700683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 79.34767150878906, "std": 80.49031829833984, "min": -108.35588073730469, "p10": -3.502161026000974, "median": 53.736228942871094, "p90": 193.31102294921877, "max": 281.71356201171875, "pos_frac": 0.875, "sample": [181.0094451904297, 31.066162109375, 96.16486358642578, 96.54029846191406, 25.081567764282227, 26.240554809570312, 46.38104248046875, 113.25003814697266, 27.031578063964844, 128.50965881347656, 15.822978973388672, 149.32272338867188, 148.02581787109375, 262.651123046875, 118.46050262451172, 39.311920166015625, 21.54267120361328, 54.96678924560547, 191.15660095214844, 52.50566864013672, 25.805295944213867, 157.39170837402344, 44.951175689697266, -4.5363922119140625, 127.07411193847656, 18.91387939453125, 72.0316162109375, 152.08480834960938, 58.641929626464844, 75.32817840576172, 95.57718658447266, -29.534332275390625, 207.89077758789062, -5.80620002746582, 58.79988098144531, 245.302734375, 64.15947723388672, 30.067283630371094, 193.70449829101562, 185.28794860839844, 16.91762351989746, 44.471832275390625, -37.30329895019531, 281.71356201171875, 17.63977813720703, 42.166439056396484, 25.33141326904297, 50.4028205871582, 16.784996032714844, -108.35588073730469, -14.439323425292969, 38.3450927734375, 37.359588623046875, 69.60984802246094, 248.30023193359375, -5.231037139892578, 140.2948760986328, 62.630531311035156, -1.0889549255371094, 23.567359924316406, 198.651123046875, 116.96961975097656, 22.94226837158203, 192.39291381835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 51.75164031982422, "std": 89.51819610595703, "min": -163.98046875, "p10": -40.973716735839844, "median": 24.572436332702637, "p90": 170.44797973632816, "max": 263.4801025390625, "pos_frac": 0.703125, "sample": [103.27355194091797, 88.86949920654297, -0.7667465209960938, 14.046390533447266, 132.06275939941406, 94.4945068359375, 163.83395385742188, 173.28256225585938, 94.68131256103516, 51.28483581542969, -0.6992244720458984, -36.90818786621094, 6.509227752685547, -61.19493865966797, 157.17320251464844, 107.13890075683594, 17.163463592529297, -15.065826416015625, 2.5475616455078125, 96.08584594726562, -13.249588012695312, 143.18914794921875, 82.62158203125, -106.579833984375, 13.512702941894531, 123.876953125, 211.90737915039062, 23.369121551513672, 223.34213256835938, 185.3643798828125, -39.18104553222656, 61.72026062011719, 25.392948150634766, 5.171882629394531, 93.07636260986328, 137.61190795898438, 1.1122207641601562, 53.45765686035156, 210.5652618408203, -28.63818359375, 128.20623779296875, 111.02299499511719, -50.15804672241211, -85.24419403076172, 82.41563415527344, -15.031890869140625, -56.28316879272461, 0.6642360687255859, -18.477394104003906, 263.4801025390625, -37.65814971923828, 94.67678833007812, 72.7322006225586, 261.4290466308594, -25.479171752929688, 23.751924514770508, 141.84170532226562, 35.65008544921875, -163.98046875, -41.74200439453125, 3.2392349243164062, -10.325214385986328, 0.4794483184814453, 1.4392528533935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 77.05169677734375, "std": 115.1351547241211, "min": -178.81492614746094, "p10": -32.88467254638672, "median": 45.904930114746094, "p90": 214.9457702636719, "max": 410.3860778808594, "pos_frac": 0.78125, "sample": [194.3655548095703, -20.871063232421875, 46.48664855957031, 42.3599853515625, 33.222225189208984, 192.0792999267578, -130.4136505126953, -8.580810546875, 247.4287109375, 204.06369018554688, -17.967987060546875, 32.49970245361328, 6.2615203857421875, 82.53446960449219, 104.13994598388672, 233.37939453125, 193.22283935546875, 55.36087417602539, -34.011444091796875, 40.95159149169922, -178.81492614746094, -55.279762268066406, 168.5777130126953, 1.5533332824707031, 246.76025390625, 157.11659240722656, 106.16609191894531, -6.774818420410156, 51.41358947753906, 134.70880126953125, -30.255538940429688, 34.1984748840332, 103.6737060546875, -0.9195919036865234, 16.538070678710938, 396.91656494140625, 121.51629638671875, 217.12298583984375, 40.109153747558594, 209.8656005859375, 145.33883666992188, -78.96492767333984, 17.080402374267578, -27.334365844726562, 45.323211669921875, 153.94448852539062, -134.91415405273438, 113.46524047851562, 19.405736923217773, 76.35952758789062, 42.45759963989258, 117.1644058227539, -76.79075622558594, 329.4136657714844, 38.4718017578125, 410.3860778808594, 85.62039184570312, 194.339599609375, 1.3234710693359375, 41.269630432128906, 67.24943542480469, 95.76204681396484, 17.694801330566406, 6.538196563720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 64.99887084960938, "std": 94.3122329711914, "min": -143.16265869140625, "p10": -36.65236473083496, "median": 43.03496170043945, "p90": 196.36385650634767, "max": 276.5227355957031, "pos_frac": 0.75, "sample": [-4.440912246704102, 173.3876495361328, 109.21900939941406, 32.66337585449219, -23.120277404785156, 11.137195587158203, 146.47877502441406, -39.077598571777344, 168.36419677734375, 9.770811080932617, 10.88675308227539, -35.55669403076172, 15.616022109985352, -12.68133544921875, 114.29674530029297, 139.31927490234375, 116.61622619628906, 46.74805450439453, 22.733625411987305, -5.470554351806641, 109.07568359375, 20.379154205322266, -65.79922485351562, -25.967437744140625, 107.23065948486328, 276.5227355957031, -36.83697509765625, 32.401512145996094, 128.6450653076172, 53.935890197753906, -13.150833129882812, -108.29218292236328, 44.76792907714844, 18.19525909423828, 41.30199432373047, 249.1259307861328, 10.25326156616211, 248.0106201171875, 135.584228515625, -36.22160720825195, 14.541702270507812, 105.8236083984375, 236.7713623046875, 100.62325286865234, 262.30908203125, 108.57968139648438, 57.68327331542969, 55.32325744628906, 198.31585693359375, 51.69458770751953, 23.028793334960938, 149.0008087158203, 54.10186767578125, -27.66102409362793, 141.98487854003906, 27.15073013305664, -65.56816101074219, 191.80918884277344, 32.86833953857422, -143.16265869140625, 235.3356475830078, 179.07925415039062, -48.49383544921875, 32.736305236816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 63.75071716308594, "std": 89.08180236816406, "min": -94.59309387207031, "p10": -30.222423934936522, "median": 45.0015983581543, "p90": 192.9256256103516, "max": 346.6085205078125, "pos_frac": 0.8125, "sample": [199.123779296875, 72.99281311035156, 6.228277206420898, 92.12757110595703, 14.3070068359375, 60.75035858154297, -46.37004852294922, 1.9127006530761719, 73.18641662597656, 183.15585327148438, -69.32231140136719, -11.112789154052734, 346.6085205078125, 8.613273620605469, 97.72089385986328, 3.8888397216796875, 152.51486206054688, 47.777748107910156, 156.691650390625, 81.09992980957031, 54.263710021972656, 58.62369155883789, -10.933975219726562, 48.690433502197266, 77.42571258544922, 242.71572875976562, -31.432113647460938, 182.9212188720703, 33.27070617675781, 51.638282775878906, 20.10434341430664, 29.781570434570312, 93.17036437988281, 12.146858215332031, 207.0850830078125, 40.23487854003906, 66.90833282470703, -90.96072387695312, 177.17755126953125, -27.39981460571289, -94.59309387207031, -42.25849914550781, 284.2041320800781, -8.503740310668945, 33.486778259277344, 12.932903289794922, 0.6377830505371094, 22.516132354736328, 147.7225341796875, 140.28196716308594, -72.00017547607422, 204.31822204589844, 99.57131958007812, 42.22544860839844, 41.35639953613281, 120.33885192871094, 34.85835266113281, 73.71167755126953, 18.671607971191406, -12.011054992675781, 14.180862426757812, 103.07817077636719, 10.879478454589844, 197.1126708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 44.8191032409668, "std": 75.1373291015625, "min": -157.6332550048828, "p10": -43.53256912231445, "median": 41.85115051269531, "p90": 136.02871093750002, "max": 219.35935974121094, "pos_frac": 0.765625, "sample": [62.816184997558594, 89.42156982421875, -12.499811172485352, 36.91101837158203, -2.8328857421875, -116.25508117675781, 167.5116424560547, 8.40081787109375, 110.7631607055664, -153.13363647460938, 7.253864288330078, 74.29183197021484, 96.11466979980469, -33.75927734375, 16.56475067138672, 44.4835205078125, 105.54558563232422, 9.104734420776367, -157.6332550048828, 96.64285278320312, 21.038291931152344, 81.21572875976562, 148.25953674316406, 24.465835571289062, 72.35348510742188, 194.58209228515625, 22.695453643798828, 129.28720092773438, -40.35932159423828, 14.736648559570312, 139.23507690429688, 95.9858627319336, 122.38462829589844, 27.689979553222656, 72.06248474121094, 168.29383850097656, -1.5477771759033203, -28.10852813720703, 22.885971069335938, -4.930675506591797, 12.445476531982422, 133.25686645507812, 9.538415908813477, 93.55889129638672, 78.87776184082031, 67.64679718017578, 46.326568603515625, 137.21664428710938, 121.45771026611328, 58.49559783935547, -59.4041748046875, 61.62648010253906, 20.007043838500977, 219.35935974121094, -62.32965087890625, 72.19107055664062, 65.02056121826172, 103.41920471191406, 39.218780517578125, 15.636112213134766, 26.344337463378906, -27.079742431640625, -51.453224182128906, -44.89253234863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 47.74309539794922, "std": 86.2498779296875, "min": -191.74639892578125, "p10": -59.533526611328114, "median": 38.94951248168945, "p90": 173.41941375732424, "max": 278.4764404296875, "pos_frac": 0.6875, "sample": [52.572628021240234, 46.00086975097656, 149.6465301513672, -9.544624328613281, 102.30955505371094, -65.78340148925781, 107.89100646972656, 158.5450439453125, 41.43022918701172, 53.97999572753906, 187.713134765625, 51.467620849609375, 68.84967041015625, 35.9281120300293, -113.29155731201172, 113.84776306152344, 176.67031860351562, 3.6984176635742188, 0.5217266082763672, 78.93794250488281, -78.22428894042969, -1.9758529663085938, 278.4764404296875, -191.74639892578125, 178.93576049804688, 59.677040100097656, 193.91693115234375, 169.9593505859375, 91.94689178466797, 50.18004608154297, -9.752384185791016, -67.91810607910156, 119.19756317138672, -14.361812591552734, -18.893905639648438, 16.721206665039062, -97.49203491210938, 10.157276153564453, 34.87794494628906, 48.2316780090332, 101.82625579833984, -44.95048522949219, 67.39967346191406, -27.084117889404297, -14.882509231567383, 174.9022979736328, 36.46879577636719, 36.14790344238281, 23.102245330810547, 59.46847152709961, 15.346336364746094, -7.482328414916992, 197.8607177734375, -1.7690658569335938, 11.567441940307617, -5.673980712890625, -9.018257141113281, 57.176780700683594, 125.71566772460938, 11.169185638427734, -67.365478515625, 135.3043212890625, 167.13534545898438, -0.11153030395507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 48.760475158691406, "std": 105.51097106933594, "min": -240.73297119140625, "p10": -54.59724235534668, "median": 37.46699333190918, "p90": 166.83025054931642, "max": 466.64788818359375, "pos_frac": 0.671875, "sample": [2.1882801055908203, 35.335445404052734, -54.977237701416016, 127.16340637207031, -53.71058654785156, 149.69381713867188, 10.510124206542969, -59.8779296875, 186.01089477539062, 29.374008178710938, -16.118423461914062, 130.75697326660156, 26.285778045654297, 46.81390380859375, 63.37739944458008, 86.53836822509766, 168.53138732910156, -61.590911865234375, -47.571929931640625, 66.81756591796875, 271.18603515625, 93.88143920898438, 254.16110229492188, -112.60774993896484, 111.53494262695312, -142.26315307617188, -47.273040771484375, 38.248416900634766, 466.64788818359375, 162.86093139648438, 28.175914764404297, 25.43408203125, 6.8516082763671875, 79.17491912841797, -29.52972412109375, -24.534069061279297, 56.90826416015625, 143.46600341796875, 36.685569763183594, -14.12664794921875, -85.76972198486328, -9.824705123901367, 67.6072998046875, 93.98121643066406, 160.19720458984375, -240.73297119140625, 18.92024803161621, 19.283645629882812, -7.916545867919922, -6.415645599365234, 155.8871307373047, 193.5847930908203, 46.07342529296875, 55.048919677734375, 82.10159301757812, -25.437339782714844, 40.1158447265625, 62.29869079589844, -41.008628845214844, 100.22626495361328, 174.33676147460938, -42.53680419921875, 71.73860931396484, -1.5220394134521484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 73.87776947021484, "std": 98.04134368896484, "min": -125.54405212402344, "p10": -28.31430320739745, "median": 54.842445373535156, "p90": 215.3812438964844, "max": 374.900390625, "pos_frac": 0.796875, "sample": [79.83689880371094, 37.601741790771484, 40.81902313232422, 204.0423583984375, -14.011573791503906, 7.7980804443359375, 242.19369506835938, 81.45275115966797, -77.85150909423828, 1.3904285430908203, 11.655080795288086, 69.73843383789062, 36.45951461791992, 215.86178588867188, 226.9510498046875, 25.49700164794922, 107.03062438964844, -6.638847351074219, 43.11908721923828, 76.14917755126953, -18.879932403564453, 35.364776611328125, 21.525009155273438, 96.80998229980469, 93.49118041992188, -7.642829895019531, 18.02044677734375, 104.68314361572266, 65.0604476928711, 198.00222778320312, 159.1115264892578, 250.6978759765625, -40.25477600097656, 46.29225158691406, 168.47607421875, -125.54405212402344, 32.21910095214844, 34.196250915527344, 32.82648849487305, -32.35760498046875, 66.6794204711914, 57.215389251708984, 80.5527572631836, 282.77825927734375, 173.91192626953125, -8.950735092163086, 374.900390625, -58.59705352783203, -7.322946548461914, 94.91280364990234, 35.928932189941406, 52.46950149536133, 214.25997924804688, -44.934776306152344, 152.3354949951172, -124.80457305908203, 142.01168823242188, 1.3413944244384766, 148.87579345703125, 124.29887390136719, 253.65036010742188, 59.439598083496094, 30.71584701538086, 85.31681823730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 74.26715850830078, "std": 87.16487121582031, "min": -220.72955322265625, "p10": -1.8884180068969725, "median": 56.59763145446777, "p90": 180.85535583496096, "max": 310.0848388671875, "pos_frac": 0.859375, "sample": [57.19074630737305, 50.368263244628906, 54.821510314941406, 35.45721435546875, 310.0848388671875, -220.72955322265625, 72.45654296875, 56.0045166015625, 27.78400421142578, 134.01895141601562, 119.8902587890625, 23.142086029052734, 78.26348876953125, 13.396955490112305, 124.26884460449219, 38.57282257080078, 40.16415786743164, -1.6833438873291016, 172.9332733154297, 81.9002685546875, 51.0252685546875, 171.55776977539062, -17.967811584472656, -11.943344116210938, 59.94975280761719, 46.7755126953125, 88.30615997314453, 7.359685897827148, 227.220458984375, 40.021507263183594, 42.67396926879883, -104.57870483398438, 177.10693359375, 16.027542114257812, 252.31192016601562, 3.6156959533691406, 118.11128997802734, 38.74104309082031, 148.36097717285156, 52.983699798583984, 145.01910400390625, 34.07704162597656, 227.46682739257812, 25.21326446533203, -108.60243225097656, 144.70697021484375, -4.620086669921875, 118.31615447998047, -1.9763069152832031, 27.862293243408203, -1.26214599609375, 145.7984619140625, 0.35228729248046875, 197.40101623535156, 182.46182250976562, 192.73968505859375, 96.13990783691406, 67.57315063476562, 29.95968246459961, 92.89959716796875, 162.22607421875, 102.20679473876953, 103.51034545898438, 97.66337585449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 61.66801452636719, "std": 101.38632202148438, "min": -174.080810546875, "p10": -29.94119873046875, "median": 50.06387710571289, "p90": 157.59001770019532, "max": 438.55279541015625, "pos_frac": 0.78125, "sample": [19.950576782226562, 10.757732391357422, 167.51727294921875, -66.57669830322266, 65.72543334960938, 38.325469970703125, 71.24105834960938, 315.1211242675781, 352.28411865234375, 8.832542419433594, 92.58731079101562, 120.19841766357422, 27.145151138305664, 97.22003173828125, -174.080810546875, -6.912773132324219, 300.8956298828125, -39.72197723388672, 97.8962173461914, -17.34794044494629, 4.301174163818359, -29.893699645996094, 136.24813842773438, 74.1225814819336, 26.34984588623047, -29.96155548095703, 0.33489036560058594, 130.66445922851562, 13.957046508789062, -5.64141845703125, -10.842453002929688, -43.6199951171875, 21.135366439819336, 110.32555389404297, 86.24021911621094, 157.93243408203125, 65.92340087890625, 65.76740264892578, 82.81752014160156, 56.666988372802734, 61.996177673339844, 35.562191009521484, 46.45405960083008, 156.79104614257812, -112.47171020507812, 53.6736946105957, 123.80524444580078, -90.49103546142578, 14.087688446044922, 106.02947998046875, -24.77573585510254, 72.50811004638672, 115.61051940917969, 15.778404235839844, 78.15719604492188, -16.531402587890625, 39.58396911621094, 116.23292541503906, 22.72509765625, 12.408103942871094, 193.60345458984375, 29.817607879638672, 63.759361267089844, 438.55279541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 52.86082077026367, "std": 88.8360366821289, "min": -133.77114868164062, "p10": -39.34584655761717, "median": 41.16757774353027, "p90": 173.30559844970713, "max": 273.11553955078125, "pos_frac": 0.734375, "sample": [7.889923095703125, 70.59762573242188, -23.541030883789062, 146.60662841796875, 3.173084259033203, -57.841827392578125, -2.283203125, 45.70298767089844, -82.61929321289062, 273.11553955078125, 64.47105407714844, 77.9150619506836, -10.115531921386719, 112.3312759399414, -96.06211853027344, 19.80035400390625, 132.22637939453125, -10.182464599609375, 240.9830780029297, 11.179727554321289, 73.46879577636719, 98.00741577148438, -10.997001647949219, 148.16001892089844, -1.9097824096679688, 121.89851379394531, 53.4970703125, 86.84428405761719, 100.69921112060547, 91.83123779296875, 4.2134246826171875, 9.940322875976562, 29.22883415222168, 272.8501281738281, -7.448127746582031, 51.083946228027344, 79.36560821533203, -96.93870544433594, 253.4423828125, 31.843978881835938, 42.321537017822266, 40.01361846923828, 56.162986755371094, 265.85247802734375, -133.77114868164062, -17.948028564453125, 74.15437316894531, 189.07958984375, 25.278594970703125, 49.76547622680664, 10.088996887207031, 142.01345825195312, -70.34983825683594, 184.082275390625, 120.92772674560547, -46.11933898925781, 50.32688903808594, 48.210235595703125, -13.451240539550781, 25.631364822387695, 5.67315673828125, -0.3042316436767578, 11.484725952148438, 11.540306091308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 57.118778228759766, "std": 75.1961898803711, "min": -52.443931579589844, "p10": -37.35862655639648, "median": 48.26272773742676, "p90": 146.62706604003907, "max": 362.429931640625, "pos_frac": 0.78125, "sample": [29.037582397460938, -41.477684020996094, -46.12214660644531, 134.7133331298828, 214.53982543945312, 41.38945770263672, 21.270214080810547, 47.66787338256836, -49.95569610595703, 50.085365295410156, 141.9949493408203, 23.943004608154297, 13.415185928344727, -2.1532974243164062, 46.43401336669922, 66.60554504394531, -28.9381046295166, 63.65924072265625, 88.69477844238281, 31.41607666015625, 5.974720001220703, 14.73681640625, 118.16759490966797, 16.992637634277344, 178.61769104003906, -3.8742218017578125, 130.76898193359375, 100.50933837890625, 63.362918853759766, 19.72553253173828, 48.857582092285156, 148.6122589111328, 3.7134571075439453, 65.0985107421875, 179.0924072265625, 119.15494537353516, 136.91278076171875, -52.443931579589844, 89.76708984375, -22.87485122680664, 96.01724243164062, -40.31462860107422, 72.20899963378906, 9.712469100952148, 362.429931640625, 6.598018646240234, 88.42828369140625, 45.56645965576172, 80.95416259765625, -5.6719207763671875, 79.39736938476562, -30.461288452148438, -46.523338317871094, 190.72244262695312, 56.911903381347656, -1.6212005615234375, 65.69477081298828, 44.13150405883789, 50.17973327636719, 76.72801971435547, -47.98877716064453, 91.06358337402344, 35.721439361572266, 168.624755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 84.63871765136719, "std": 111.38013458251953, "min": -106.69830322265625, "p10": -49.61666717529297, "median": 58.97948455810547, "p90": 249.54578094482423, "max": 403.15081787109375, "pos_frac": 0.78125, "sample": [30.78417205810547, 187.11611938476562, 33.74253463745117, 297.0230407714844, 170.36529541015625, 83.40382385253906, 270.7808837890625, 43.28355026245117, 289.505126953125, 247.1758575439453, 153.27938842773438, 111.65956115722656, 232.67047119140625, 139.28671264648438, 306.1031494140625, 129.25408935546875, 34.28376770019531, 225.11041259765625, 14.025665283203125, -53.792686462402344, -3.881927490234375, 119.59739685058594, 191.63400268554688, 80.09364318847656, 48.048072814941406, 105.14215087890625, -10.840560913085938, 403.15081787109375, -29.42782211303711, 130.23751831054688, -74.63018798828125, 223.53042602539062, 250.56146240234375, -83.58430480957031, 49.198551177978516, -16.859264373779297, 27.476850509643555, 61.65673065185547, 159.66830444335938, -11.563735961914062, -64.27479553222656, 67.22967529296875, 93.73004150390625, 76.05342102050781, 7.952976226806641, 56.30223846435547, 4.565189361572266, -26.878890991210938, 51.627166748046875, 12.765148162841797, 34.286720275878906, 119.34197998046875, 100.34170532226562, 96.998779296875, 15.219131469726562, 130.24642944335938, 52.24712371826172, 283.5729064941406, 14.994697570800781, -106.69830322265625, -46.11744689941406, -51.1163330078125, -71.27772521972656, 1.49664306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 57.255409240722656, "std": 113.35480499267578, "min": -102.4334716796875, "p10": -53.37466201782227, "median": 30.975234985351562, "p90": 142.80155944824222, "max": 506.6776123046875, "pos_frac": 0.65625, "sample": [73.61102294921875, 97.03035736083984, 145.20431518554688, 117.51828002929688, -6.65142822265625, -52.94428634643555, 334.8394775390625, 145.33090209960938, -23.757110595703125, 70.90396118164062, 118.91593933105469, 77.86384582519531, -42.71791076660156, 17.32866668701172, 84.93472290039062, 64.11386108398438, 88.28993225097656, 40.370948791503906, 130.12353515625, 394.2040100097656, 6.1895751953125, 72.83065795898438, 506.6776123046875, -13.738655090332031, 133.63613891601562, -5.002922058105469, 255.7451171875, -12.080623626708984, 360.60797119140625, 26.136932373046875, 11.129539489746094, -7.706634521484375, 63.58507537841797, -84.75418090820312, 59.01393127441406, -91.33256530761719, 20.384628295898438, -24.1693115234375, 102.86968994140625, 92.51194763183594, 11.533666610717773, -20.321727752685547, -18.397125244140625, 67.80752563476562, 14.898178100585938, 4.276189804077148, -29.571186065673828, 35.644676208496094, 111.30819702148438, 137.19512939453125, -60.295204162597656, -69.11600494384766, 38.6907958984375, 97.38863372802734, -102.4334716796875, -11.067741394042969, 98.81211853027344, -25.998241424560547, -53.55910873413086, 26.30579376220703, -1.8193092346191406, 130.7429656982422, 8.143516540527344, -72.86882781982422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 89.07611846923828, "std": 133.27589416503906, "min": -190.00259399414062, "p10": -33.5611099243164, "median": 59.005455017089844, "p90": 287.5286102294925, "max": 456.91229248046875, "pos_frac": 0.828125, "sample": [-55.62696838378906, 5.288547515869141, 100.89387512207031, 141.82232666015625, 169.80831909179688, 34.524635314941406, 54.91893005371094, 109.5599365234375, -26.183589935302734, 107.40948486328125, 8.590353012084961, 101.95547485351562, 456.91229248046875, 23.98267936706543, 39.67719268798828, -126.409912109375, 136.26690673828125, 7.260112762451172, 39.502830505371094, 175.2344207763672, 168.2996063232422, 100.5723648071289, 4.335540771484375, 442.98077392578125, 127.63166809082031, 355.61749267578125, 216.37237548828125, 185.13856506347656, 13.674901962280273, 183.73361206054688, -117.60580444335938, 6.487640380859375, 77.8037338256836, 16.51642608642578, 151.66940307617188, 12.327091217041016, 141.35711669921875, 52.47102355957031, 12.181251525878906, 63.87174987792969, 70.71353149414062, 47.72551727294922, -8.073341369628906, -190.00259399414062, -54.276084899902344, -84.92424774169922, 451.167724609375, -36.722904205322266, 48.073936462402344, 374.5330505371094, 63.74115753173828, 146.27096557617188, 1.9660415649414062, 103.76055908203125, -22.13545036315918, 27.568696975708008, 62.707305908203125, 4.196636199951172, -2.425069808959961, 107.78260040283203, 352.37664794921875, 55.30360412597656, 142.69479370117188, 318.0241394042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 68.0129165649414, "std": 85.9329833984375, "min": -135.05657958984375, "p10": -24.693673515319823, "median": 42.57146644592285, "p90": 177.51772613525392, "max": 332.10150146484375, "pos_frac": 0.765625, "sample": [184.18643188476562, -14.19384765625, 80.32243347167969, 158.35256958007812, 4.7956085205078125, 129.00625610351562, -32.86878967285156, -0.7416934967041016, 10.77069091796875, 197.1677703857422, 38.94381332397461, 35.75843811035156, 41.18006896972656, 10.694091796875, 35.42854309082031, -135.05657958984375, 18.938581466674805, -27.444202423095703, -4.496795654296875, -12.006568908691406, 222.57894897460938, 195.34765625, 18.82122039794922, 13.574378967285156, 133.74667358398438, 145.8708038330078, 157.24462890625, -7.754631042480469, 100.40233612060547, 179.5375213623047, 68.244140625, 32.77595520019531, 135.75033569335938, -3.99310302734375, 157.80914306640625, 219.4575958251953, 96.62234497070312, 43.46184158325195, -84.85917663574219, 82.83330535888672, 78.35250854492188, 23.351688385009766, 122.18089294433594, 1.3280696868896484, -6.1317596435546875, -68.37825775146484, 111.89635467529297, 165.05276489257812, 14.705440521240234, 41.68109130859375, 132.78482055664062, 40.740692138671875, 153.92530822753906, -46.530418395996094, -23.348474502563477, 73.21980285644531, -25.270187377929688, 83.42831420898438, 31.3089599609375, 332.10150146484375, 124.58589935302734, 65.5438232421875, 127.28418731689453, 172.80487060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 78.85863494873047, "std": 103.2675552368164, "min": -114.67269897460938, "p10": -39.71513519287109, "median": 64.90960693359375, "p90": 186.8142501831055, "max": 458.9858703613281, "pos_frac": 0.78125, "sample": [-27.405380249023438, 181.6336669921875, 110.75749969482422, 193.5034637451172, -42.42133331298828, 79.3578872680664, 129.81857299804688, -23.22418212890625, 93.33099365234375, 164.29006958007812, -114.67269897460938, 57.166900634765625, -60.11531448364258, 314.7506103515625, 159.3551025390625, 51.22955322265625, 91.83920288085938, 76.46366119384766, 40.00806427001953, 71.34109497070312, 172.18710327148438, 78.90232849121094, -81.88809967041016, 138.30609130859375, 58.478118896484375, -2.2099876403808594, 172.25741577148438, 99.74072265625, 19.92203712463379, 42.735877990722656, 171.30482482910156, 34.137535095214844, 151.2687225341797, 136.58914184570312, 44.51203918457031, 111.61871337890625, -33.400672912597656, -4.657838821411133, 15.169710159301758, 23.504310607910156, 98.0569839477539, -2.8348617553710938, 23.369789123535156, 173.04513549804688, 22.540306091308594, 315.29998779296875, 108.28156280517578, -31.134445190429688, 87.3827133178711, -69.53511810302734, 89.6817398071289, -75.97032165527344, 33.91413879394531, 96.57427978515625, 458.9858703613281, 39.14950942993164, 266.8280029296875, 54.384185791015625, 50.53253936767578, -75.10099792480469, 222.69000244140625, 30.818557739257812, 45.473121643066406, 189.0345001220703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 70.27452850341797, "std": 105.53141784667969, "min": -196.7460174560547, "p10": -61.59022750854491, "median": 75.42626571655273, "p90": 194.05391235351564, "max": 404.32452392578125, "pos_frac": 0.75, "sample": [-23.284221649169922, 137.22756958007812, 404.32452392578125, -90.41741180419922, -24.94963836669922, 131.41741943359375, 90.60907745361328, 158.99769592285156, 63.46239471435547, 179.36973571777344, 140.55145263671875, 81.99090576171875, 128.3714141845703, 46.59001922607422, 24.355201721191406, 99.8260726928711, 298.3831481933594, 14.32815170288086, 4.854297637939453, 83.91284942626953, 23.389251708984375, 73.68306732177734, 102.07266235351562, 161.44332885742188, -2.577972412109375, -63.97306823730469, 293.14898681640625, -68.81498718261719, 36.88600158691406, 89.09614562988281, 26.251134872436523, 86.1904525756836, 8.409921646118164, 131.21148681640625, -48.00592041015625, 73.50411987304688, 103.4798583984375, 75.4224624633789, -26.337127685546875, 188.38055419921875, 140.28790283203125, -46.782615661621094, 205.28688049316406, -196.7460174560547, 0.2783355712890625, 67.51411437988281, 75.43006896972656, 100.6342544555664, 216.47494506835938, 27.588272094726562, -9.176023483276367, 181.5579376220703, -16.152496337890625, 13.689239501953125, -111.41621398925781, -71.88510131835938, -85.0656509399414, 86.98695373535156, 199.084228515625, 196.4853515625, -56.03026580810547, 88.2416763305664, 112.24093627929688, 166.26229858398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 66.66455078125, "std": 114.71014404296875, "min": -285.0177917480469, "p10": -61.85063858032226, "median": 61.552345275878906, "p90": 204.27646789550786, "max": 352.88836669921875, "pos_frac": 0.75, "sample": [58.427490234375, 42.156455993652344, -12.583614349365234, 119.18148040771484, 64.67720031738281, -39.630462646484375, 149.56219482421875, 81.69985961914062, -1.7059555053710938, -11.463607788085938, 70.66653442382812, 318.9310302734375, -285.0177917480469, 52.533687591552734, -8.266716003417969, 28.063995361328125, -1.061981201171875, -78.01309204101562, 301.00103759765625, -63.877105712890625, -57.122215270996094, 235.3861846923828, 52.542781829833984, 99.193115234375, 159.28536987304688, 88.7227783203125, 133.7786865234375, 4.954566955566406, -129.30160522460938, 74.59234619140625, 116.2518310546875, 93.96128845214844, 37.66761016845703, 189.79263305664062, 283.2092590332031, 29.76116371154785, 124.3261489868164, 26.836637496948242, -18.15460777282715, 24.767620086669922, -37.36742401123047, 58.323646545410156, -75.83882904052734, 210.48382568359375, 0.44005584716796875, 154.5091552734375, 214.57763671875, 148.65451049804688, 11.889442443847656, -221.59213256835938, 18.918495178222656, 108.40638732910156, 72.46856689453125, 97.66624450683594, 136.94427490234375, 167.61862182617188, 352.88836669921875, 142.62823486328125, 2.803628921508789, 178.66940307617188, 89.63287353515625, 133.03952026367188, -67.0099105834961, 12.043899536132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 76.46134185791016, "std": 121.94556427001953, "min": -230.20932006835938, "p10": -51.53967056274413, "median": 45.342220306396484, "p90": 266.53167114257815, "max": 315.90338134765625, "pos_frac": 0.75, "sample": [-23.865989685058594, 315.7984924316406, 44.89995574951172, 45.78448486328125, -39.112037658691406, 262.3187255859375, -41.023616790771484, -27.25379753112793, -105.86326599121094, 36.13969039916992, 143.31243896484375, 103.72103881835938, 155.29531860351562, 24.31474494934082, 47.86597442626953, 255.04481506347656, 8.94942855834961, -230.20932006835938, -197.64395141601562, 81.10486602783203, 21.50011444091797, 30.921478271484375, 12.925922393798828, 315.90338134765625, 182.06141662597656, 38.561065673828125, 159.9987335205078, 25.778825759887695, 172.439697265625, -22.742591857910156, 148.3837890625, 140.42562866210938, 300.45672607421875, 90.49491882324219, -0.2000751495361328, 225.01475524902344, 235.2826385498047, 18.821529388427734, 23.230192184448242, 50.95286560058594, -65.54652404785156, -56.04655075073242, -1.868621826171875, 270.31219482421875, 81.49246978759766, 271.3165283203125, 14.905288696289062, 39.24510955810547, 82.95176696777344, -57.95393371582031, 61.983219146728516, -33.135650634765625, 5.721574783325195, -100.12606811523438, 215.0916290283203, 276.0638732910156, 25.733657836914062, 220.68267822265625, 44.34625244140625, -14.34150505065918, 137.56887817382812, 268.33721923828125, 46.31047058105469, 130.69277954101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 49.55633544921875, "std": 108.37023162841797, "min": -189.60507202148438, "p10": -64.37529067993164, "median": 34.24697494506836, "p90": 164.69364013671876, "max": 389.2689514160156, "pos_frac": 0.671875, "sample": [89.79505157470703, -19.883804321289062, 57.654762268066406, 19.74212646484375, 320.1396484375, 99.9334487915039, -63.40788269042969, 107.60196685791016, 112.04721069335938, 28.471818923950195, 159.183349609375, -87.03736877441406, -57.31855010986328, 158.97119140625, 91.91773223876953, -136.21885681152344, -50.95964813232422, -2.4032020568847656, 107.20977783203125, 5.4310302734375, 39.464935302734375, 293.6202697753906, -64.7898941040039, 108.38572692871094, 22.87647247314453, -24.306392669677734, 42.71345520019531, 166.64614868164062, 37.757225036621094, 17.514904022216797, -19.220497131347656, -49.98851776123047, -71.15402221679688, 5.548971176147461, 160.13778686523438, 0.522369384765625, 61.192787170410156, 83.57848358154297, 38.774810791015625, 389.2689514160156, -28.635963439941406, -17.69593620300293, 35.831398010253906, 91.58781433105469, -1.8680133819580078, 57.423553466796875, 175.20315551757812, 145.1751708984375, -5.593528747558594, -71.3089599609375, -111.52362060546875, 353.94720458984375, -40.99988555908203, 96.47881317138672, 65.76033020019531, 32.66255187988281, 122.98469543457031, 13.431755065917969, 198.5455322265625, -189.60507202148438, -14.180610656738281, 48.12335205078125, 25.473472595214844, 10.974288940429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 58.62339401245117, "std": 92.73297882080078, "min": -161.66339111328125, "p10": -43.90634498596191, "median": 49.74464797973633, "p90": 194.3160095214844, "max": 266.74188232421875, "pos_frac": 0.71875, "sample": [-49.2916259765625, 128.5361328125, -13.381612777709961, 190.08883666992188, -116.5954360961914, -30.3525390625, 64.78331756591797, 126.62767791748047, 68.50988006591797, 1.9585227966308594, 76.18321990966797, -37.52903366088867, 80.2296142578125, 121.12660217285156, 60.984283447265625, 196.12765502929688, 55.414207458496094, 244.9747314453125, 115.23595428466797, 156.44412231445312, 250.01177978515625, 203.06698608398438, -28.256669998168945, -161.66339111328125, 115.9199447631836, -31.2769775390625, 49.85664367675781, -5.211982727050781, 53.52947235107422, 24.904924392700195, -57.164634704589844, 60.92900085449219, 74.86648559570312, 29.05987548828125, -67.97147369384766, 150.94781494140625, 50.634735107421875, 34.95432662963867, 23.270414352416992, 98.15111541748047, 33.06715393066406, 132.79147338867188, 263.4704895019531, 24.802465438842773, 34.74678039550781, 81.44804382324219, -37.619693756103516, 24.691444396972656, 14.03570556640625, 118.39109802246094, 139.92282104492188, 266.74188232421875, -1.25506591796875, -72.10009765625, -25.1890869140625, -23.068931579589844, 235.87852478027344, 12.496955871582031, 28.45832633972168, -6.296562194824219, 49.632652282714844, 33.447959899902344, 161.3707733154297, -46.600624084472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 63.563446044921875, "std": 93.59578704833984, "min": -159.00363159179688, "p10": -49.69193763732909, "median": 46.917680740356445, "p90": 172.4570022583008, "max": 281.8798828125, "pos_frac": 0.734375, "sample": [27.16875457763672, -56.908294677734375, -106.16935729980469, 281.8798828125, 165.82386779785156, 132.15365600585938, 262.9935302734375, 5.732204437255859, -159.00363159179688, 58.85807418823242, -1.135772705078125, 85.51412963867188, 159.92245483398438, -68.02499389648438, 16.128875732421875, 138.94049072265625, 64.49604034423828, 50.38444519042969, 131.23809814453125, -54.17688751220703, -16.486120223999023, 43.669586181640625, 44.665313720703125, 150.85824584960938, 91.68536376953125, -83.53456115722656, -34.63478088378906, 46.38547134399414, -15.925888061523438, 208.6043701171875, 30.352140426635742, 10.428741455078125, 127.53564453125, -39.227054595947266, -61.88352966308594, 137.85939025878906, 21.175674438476562, 22.85087776184082, 89.25728607177734, -14.376785278320312, 80.2611083984375, -10.42104721069336, 132.761962890625, -26.231555938720703, 127.35757446289062, 220.5365753173828, 104.90763854980469, -29.668930053710938, 35.77579879760742, 134.99484252929688, 37.2463264465332, 47.44989013671875, 156.9104461669922, 175.29977416992188, 63.84829330444336, 21.005260467529297, -3.9777755737304688, 73.28881072998047, 162.216552734375, 134.53646850585938, 277.5804748535156, 35.85079574584961, 10.342491149902344, 211.11395263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 49.569610595703125, "std": 99.00074768066406, "min": -249.55795288085938, "p10": -67.9942024230957, "median": 63.96097946166992, "p90": 180.3398727416993, "max": 275.18798828125, "pos_frac": 0.65625, "sample": [32.51097106933594, -47.29151153564453, 119.81910705566406, -68.3424072265625, 20.9578914642334, -5.222587585449219, 87.94580841064453, 75.90731811523438, 87.79855346679688, 84.92005920410156, 203.84951782226562, 126.28752899169922, 51.742767333984375, -30.533767700195312, 26.590675354003906, -19.31847381591797, 99.69116973876953, -0.0009307861328125, 43.361061096191406, -27.256914138793945, -6.281099319458008, 32.91314697265625, 62.911399841308594, 140.48939514160156, -110.92021179199219, 73.77240753173828, 159.22166442871094, -249.55795288085938, -85.0623779296875, 243.7894287109375, -88.62557220458984, 81.96012878417969, -5.214817047119141, 85.49594116210938, 208.23313903808594, -67.18172454833984, 65.01710510253906, 65.11366271972656, 136.56607055664062, 275.18798828125, -2.5726661682128906, -35.48023986816406, 48.706912994384766, 95.3264389038086, 68.15718078613281, 112.02091217041016, 105.28768157958984, -102.54200744628906, 65.01055908203125, -90.79234313964844, 84.33071899414062, 158.434326171875, 189.39053344726562, 76.07820892333984, 1.9957046508789062, 132.9534912109375, 45.33936309814453, -58.02897644042969, 105.01652526855469, -64.24269104003906, 244.58058166503906, -41.52522277832031, 217.58984375, -63.823272705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 66.63192749023438, "std": 103.06085968017578, "min": -181.2686767578125, "p10": -36.8555923461914, "median": 53.064491271972656, "p90": 217.57971496582033, "max": 316.95086669921875, "pos_frac": 0.75, "sample": [121.35964965820312, -84.41305541992188, 42.392181396484375, 249.07431030273438, -32.24498748779297, -12.079734802246094, 113.21018981933594, 110.81999969482422, 70.63909912109375, -38.831565856933594, 31.574506759643555, 92.3431167602539, 65.35091400146484, 149.320556640625, 30.529590606689453, 220.053955078125, 116.53878784179688, 16.910022735595703, 13.45538330078125, 178.75527954101562, 171.0206756591797, -31.38292694091797, 193.33963012695312, 27.83097267150879, 316.95086669921875, 51.88114929199219, 229.75460815429688, 126.71502685546875, -19.170251846313477, 54.247833251953125, 37.72469711303711, -165.20550537109375, 5.554216384887695, 127.37046813964844, 113.9578857421875, -30.657684326171875, 67.48200988769531, 297.08343505859375, 82.56928253173828, 27.11819839477539, 211.80648803710938, 20.477020263671875, -19.832326889038086, 13.162269592285156, 37.39418029785156, 63.970252990722656, -11.587234497070312, 87.73855590820312, 71.7662124633789, -43.48412322998047, -14.11639404296875, 54.96627426147461, 23.443035125732422, 203.1635284423828, -27.28820037841797, 67.73072814941406, 274.83172607421875, -45.740989685058594, 36.11659240722656, -181.2686767578125, 265.2176513671875, -76.0693359375, 65.33131408691406, 47.77203369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 58.86853790283203, "std": 97.90667724609375, "min": -120.96359252929688, "p10": -37.02766876220703, "median": 40.14466094970703, "p90": 170.70296936035157, "max": 408.5189208984375, "pos_frac": 0.78125, "sample": [-7.884540557861328, 56.968101501464844, 4.774200439453125, 92.81454467773438, -70.25849151611328, 123.7082290649414, 82.42242431640625, 42.18894577026367, -49.937286376953125, 221.76344299316406, 29.66875457763672, 168.1226806640625, 21.20918083190918, -3.9731979370117188, 173.9149169921875, 274.07293701171875, 5.0125274658203125, 35.28826904296875, -54.08672332763672, -78.36856079101562, 145.16543579101562, 139.41592407226562, -16.00359535217285, 117.32388305664062, 100.08629608154297, 48.435340881347656, 66.0116195678711, 54.66594314575195, 115.26924133300781, 0.467864990234375, 171.80880737304688, 19.393951416015625, 38.54821014404297, 85.47703552246094, -36.63904571533203, 50.160640716552734, 41.741111755371094, 16.366226196289062, 126.3956069946289, 72.2081298828125, 127.09619903564453, 30.34600067138672, -87.36843872070312, 53.96138000488281, 44.82225799560547, 20.035362243652344, 220.50311279296875, 54.687721252441406, -10.039390563964844, -37.19422149658203, -120.96359252929688, 29.533889770507812, 12.78477668762207, 3.2172374725341797, 36.37676239013672, 7.134037017822266, 42.80449676513672, 406.67132568359375, 408.5189208984375, 9.964225769042969, -4.018854141235352, -13.442665100097656, 97.85456848144531, 10.582048416137695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 70.72378540039062, "std": 90.47725677490234, "min": -189.71263122558594, "p10": -12.054581642150875, "median": 60.057273864746094, "p90": 162.7560760498047, "max": 324.03955078125, "pos_frac": 0.828125, "sample": [125.24588775634766, 74.843505859375, 217.882568359375, 66.58065795898438, 116.2949447631836, 98.58224487304688, 77.7596664428711, -8.917627334594727, -18.160140991210938, -19.060009002685547, 25.83362579345703, -7.9370269775390625, 62.75508499145508, 48.7835693359375, 59.06146240234375, -74.34323120117188, 87.83973693847656, 104.40826416015625, 39.646575927734375, 144.51498413085938, 130.33120727539062, 180.7884063720703, 118.16561126708984, 30.782316207885742, 15.184080123901367, 114.8994140625, 55.96434020996094, 136.73867797851562, 81.50360107421875, 161.00772094726562, 72.00434112548828, -3.2221603393554688, 324.03955078125, 48.8846435546875, 163.50537109375, 26.4273681640625, 261.89996337890625, 61.05308532714844, 31.34477996826172, 128.35821533203125, 27.214588165283203, 71.89230346679688, 11.479852676391602, 46.77940368652344, 0.21079254150390625, 56.710601806640625, 13.75143051147461, 62.891624450683594, -53.549346923828125, -189.71263122558594, 146.50408935546875, 16.156227111816406, -3.402435302734375, 152.74322509765625, 9.773412704467773, 320.4903869628906, 17.261411666870117, 41.566680908203125, 142.39703369140625, 277.94061279296875, 5.061317443847656, -13.398990631103516, 100.47940063476562, -96.19391632080078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 70.91671752929688, "std": 100.23312377929688, "min": -94.20390319824219, "p10": -22.700756072998047, "median": 41.41653060913086, "p90": 186.02308197021486, "max": 458.845947265625, "pos_frac": 0.828125, "sample": [308.0948486328125, 305.2271728515625, 36.10193634033203, 37.97694396972656, 65.23101806640625, -41.551979064941406, 195.89334106445312, 43.85852813720703, -49.169288635253906, 92.31433868408203, 458.845947265625, -3.5203380584716797, -23.11334991455078, 21.77165985107422, -19.138900756835938, 180.7746124267578, 26.78656005859375, 49.14807891845703, 45.70599365234375, 99.25763702392578, 40.3476448059082, 77.63594818115234, 161.14932250976562, 28.857559204101562, 117.09126281738281, 95.9452896118164, 207.0961151123047, 145.00169372558594, 16.251480102539062, 63.453922271728516, 19.18407440185547, -57.98771667480469, 81.99169158935547, 186.83717346191406, 119.94036865234375, 7.923471450805664, 184.12353515625, 24.437841415405273, 173.35873413085938, -21.738037109375, 112.88600158691406, -49.54821014404297, -51.972373962402344, 13.102951049804688, 123.50948333740234, 9.505184173583984, 40.557273864746094, 65.59825134277344, 21.90045928955078, 42.275787353515625, 63.04432678222656, 73.83528137207031, 73.98717498779297, 22.483154296875, 27.65416145324707, 352.50836181640625, 14.452985763549805, 17.409713745117188, 9.240167617797852, 33.88780975341797, 117.84966278076172, -9.595085144042969, -94.20390319824219, 6.905132293701172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 71.84840393066406, "std": 91.8819351196289, "min": -157.88656616210938, "p10": -34.242719650268555, "median": 60.469743728637695, "p90": 205.8341979980469, "max": 292.3525390625, "pos_frac": 0.84375, "sample": [-157.88656616210938, 234.4275665283203, 59.00004577636719, 6.568046569824219, 55.156105041503906, 62.11280822753906, 70.66650390625, 129.66165161132812, 78.17384338378906, 24.765304565429688, 197.75518798828125, -47.05626678466797, -34.350563049316406, 108.80052185058594, 61.32505798339844, -51.28835678100586, -4.232273101806641, 51.65862274169922, 70.14985656738281, 59.56512451171875, 284.34417724609375, 59.61442947387695, 165.6162872314453, 58.18223571777344, -66.8058853149414, 187.18365478515625, 30.750396728515625, 15.276641845703125, -33.991085052490234, 12.018173217773438, 39.45240020751953, 3.1149654388427734, 19.253864288330078, 6.99705696105957, 138.5413055419922, 198.0469970703125, -71.1878662109375, 67.17078399658203, 104.6224594116211, 125.56376647949219, 5.122243881225586, 65.33704376220703, 129.0001983642578, 79.30047607421875, 1.4966583251953125, 211.22274780273438, 125.13548278808594, 64.42365264892578, 137.62643432617188, -15.772712707519531, 264.447021484375, 4.623634338378906, 6.04149055480957, 155.69598388671875, 0.645751953125, 14.298900604248047, 209.17156982421875, 130.20684814453125, 292.3525390625, 216.98391723632812, -54.930572509765625, 84.34101104736328, 22.86983871459961, 129.9207000732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 85.42166900634766, "std": 106.54341888427734, "min": -95.47952270507812, "p10": -47.73816070556641, "median": 64.07086944580078, "p90": 240.91099700927734, "max": 348.0552978515625, "pos_frac": 0.75, "sample": [23.973495483398438, 75.69377899169922, 173.7902374267578, 196.8739471435547, 78.23243713378906, 226.53448486328125, 65.12167358398438, 182.64324951171875, 348.0552978515625, 41.17015075683594, 165.29510498046875, 136.6562042236328, 36.36641311645508, -68.23979949951172, -5.734169006347656, 241.33966064453125, 150.56829833984375, 59.023597717285156, 74.02348327636719, 77.63142395019531, 57.81471252441406, 58.033653259277344, -48.085479736328125, 83.47430419921875, -10.334686279296875, 341.8592529296875, 143.59780883789062, 245.5234832763672, 48.7117919921875, 31.19432830810547, 63.02006530761719, -40.031982421875, 17.337520599365234, -7.48321533203125, -0.19456100463867188, 60.21624755859375, 18.94306182861328, 217.82504272460938, 29.794174194335938, 53.319671630859375, 331.7992858886719, -36.88642883300781, 57.643531799316406, 246.09121704101562, 70.93879699707031, -65.19766235351562, 260.9537353515625, 39.00628662109375, -82.01337432861328, 105.97976684570312, -1.2974681854248047, 160.17599487304688, 105.90425109863281, 93.49071502685547, 239.91078186035156, -51.32872772216797, 187.27401733398438, -95.47952270507812, -8.334220886230469, 128.26625061035156, -55.534393310546875, -46.92774963378906, 147.560791015625, 91.43665313720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 72.57691955566406, "std": 112.7436752319336, "min": -184.590087890625, "p10": -38.21440277099609, "median": 58.480995178222656, "p90": 216.185954284668, "max": 462.80126953125, "pos_frac": 0.734375, "sample": [91.13644409179688, -12.218971252441406, 11.648193359375, -13.01171875, 17.19301986694336, -14.828643798828125, 25.939117431640625, 209.4254150390625, -46.41291809082031, 199.16212463378906, 102.76380920410156, 6.762672424316406, -52.28892135620117, 87.52816772460938, 251.8614959716797, -74.45211791992188, 77.312744140625, 18.841487884521484, 62.241973876953125, 103.51766204833984, 54.72001647949219, 462.80126953125, 44.576988220214844, 67.91705322265625, 80.2637939453125, 182.58538818359375, -16.29401397705078, -50.52483367919922, 219.0833282470703, 251.2080078125, 145.6996612548828, 43.33856964111328, 277.7945556640625, 3.98370361328125, 1.5813179016113281, 1.4219818115234375, 190.90982055664062, 110.98914337158203, -34.11112594604492, -39.97294998168945, 144.87112426757812, 0.908294677734375, 28.140457153320312, 316.2609558105469, 28.290237426757812, -11.861831665039062, -65.25807189941406, 69.57183837890625, -1.0074920654296875, -184.590087890625, 92.36935424804688, 387.6307373046875, 83.65977478027344, -32.51508712768555, 72.3181381225586, -5.441341400146484, 97.6035385131836, 53.103851318359375, 137.01498413085938, 133.48483276367188, 90.101806640625, -32.02195739746094, 85.54119110107422, 106.65487670898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 68.1134262084961, "std": 118.94171905517578, "min": -272.35626220703125, "p10": -57.273268127441405, "median": 54.228044509887695, "p90": 203.07899475097656, "max": 336.5234680175781, "pos_frac": 0.65625, "sample": [-10.36917495727539, 64.31080627441406, -272.35626220703125, 31.800081253051758, 336.5234680175781, 55.13800811767578, -13.518808364868164, 176.35067749023438, 197.76992797851562, -61.018680572509766, 89.6015396118164, 266.19146728515625, -119.88687133789062, 139.5339813232422, 121.46563720703125, 53.31808090209961, 3.145814895629883, 42.091461181640625, 35.546756744384766, -5.422964096069336, 125.83198547363281, 59.69369125366211, -12.402568817138672, 64.80430603027344, -43.36616516113281, 214.25930786132812, -50.882144927978516, 235.25863647460938, 167.10855102539062, -110.51077270507812, -38.40533447265625, 190.04962158203125, -58.741966247558594, 187.94287109375, -52.46788024902344, 201.9702911376953, 321.8714904785156, 193.81280517578125, -18.891632080078125, 187.4613800048828, 203.5541534423828, 122.99777221679688, 25.536792755126953, 88.53170776367188, 195.8675994873047, 1.07244873046875, -26.47821044921875, -4.721345901489258, -97.08226776123047, 127.86454772949219, 167.66078186035156, -51.34565734863281, 5.285497665405273, 38.84295654296875, 8.82928466796875, -79.32560729980469, 112.10200500488281, 144.84918212890625, 140.71954345703125, -53.84630584716797, -5.807647705078125, -0.9012928009033203, 82.14556884765625, 318.29656982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 66.88587951660156, "std": 104.13064575195312, "min": -219.4533233642578, "p10": -23.759150314331052, "median": 66.3926773071289, "p90": 172.47914581298832, "max": 368.68389892578125, "pos_frac": 0.71875, "sample": [-11.343902587890625, 105.12935638427734, 59.068885803222656, -8.273162841796875, 110.675048828125, 12.836814880371094, 82.12893676757812, -31.848251342773438, 77.54228210449219, 22.39365005493164, -15.795120239257812, 61.65132141113281, 148.6934814453125, -143.15435791015625, 114.48560333251953, 81.77892303466797, -17.216327667236328, 52.408668518066406, 70.98867797851562, -24.994232177734375, 74.77005004882812, 79.82259368896484, 55.3521728515625, 368.68389892578125, 199.60252380371094, 8.493696212768555, 147.75875854492188, 105.9539566040039, -2.6768112182617188, -142.59291076660156, 25.962440490722656, -4.7821044921875, 23.054931640625, 107.8800048828125, 210.8035888671875, -219.4533233642578, 307.19781494140625, 44.012977600097656, 126.81814575195312, -18.748336791992188, 5.580913543701172, 106.19554138183594, 160.47108459472656, 61.79667663574219, 301.1513977050781, 104.68258666992188, 288.9624938964844, 164.28126525878906, -93.0604248046875, 120.08352661132812, -14.823287963867188, 175.99252319335938, -20.87729263305664, 117.0765609741211, 87.18557739257812, -73.40837860107422, 57.23271942138672, 134.881103515625, 125.9040756225586, 13.613285064697266, 147.22410583496094, -4.449960708618164, -15.830253601074219, 85.76042938232422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 39.98707962036133, "std": 115.67086029052734, "min": -295.817626953125, "p10": -95.76607360839843, "median": 28.476831436157227, "p90": 147.3846862792969, "max": 309.60040283203125, "pos_frac": 0.71875, "sample": [135.43862915039062, 62.57721710205078, 309.60040283203125, 256.7996826171875, 177.018310546875, 121.66293334960938, -216.35824584960938, -1.04888916015625, -84.51153564453125, 18.95703887939453, -248.32640075683594, 146.83392333984375, 109.11595916748047, -28.521968841552734, -68.48457336425781, -129.9998779296875, 47.13282012939453, 119.46197509765625, 275.1890869140625, 77.74421691894531, 37.84033203125, 131.85360717773438, 92.10955810546875, 26.55539321899414, 7.824251174926758, -19.166908264160156, 14.867036819458008, 13.102920532226562, 265.4142761230469, -17.228050231933594, 261.57977294921875, 147.6207275390625, 120.84510040283203, 68.26642608642578, 9.634651184082031, 7.81934928894043, 86.88539123535156, -100.58944702148438, -295.817626953125, -80.3125991821289, 85.25055694580078, -60.761314392089844, -16.846054077148438, 130.0766143798828, 30.398269653320312, -106.52761840820312, 88.13018035888672, 133.5238037109375, 75.40773010253906, 5.030557632446289, -3.9788150787353516, 79.40261840820312, 80.24060821533203, 62.64641571044922, -128.96331787109375, -63.09904479980469, 22.577255249023438, 25.767181396484375, 9.61004638671875, 2.7404098510742188, 23.126426696777344, 79.4603271484375, 4.026470184326172, 142.54885864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 86.78329467773438, "std": 109.5932846069336, "min": -285.78472900390625, "p10": -27.11414852142334, "median": 83.70547103881836, "p90": 209.67184753417973, "max": 409.3968505859375, "pos_frac": 0.8125, "sample": [172.7188720703125, 174.86627197265625, 100.58948516845703, 198.3383331298828, 160.958740234375, 12.389144897460938, 84.77272033691406, 161.95254516601562, -0.5147781372070312, 183.7059326171875, 63.442325592041016, 226.36322021484375, 50.19709777832031, 85.18234252929688, 230.916748046875, 214.52906799316406, 164.8115234375, 101.35399627685547, 35.85377502441406, 161.40615844726562, 9.873893737792969, 47.56201171875, 83.20545959472656, -33.87540054321289, 128.7659454345703, 252.4246063232422, -4.638458251953125, 34.52675247192383, 3.46514892578125, 110.70170593261719, -285.78472900390625, -24.32164192199707, 26.498214721679688, 40.308921813964844, 133.59812927246094, 84.20548248291016, 259.4585266113281, 150.20162963867188, 100.93949890136719, -31.435361862182617, 34.3209114074707, 298.23101806640625, 35.496910095214844, 69.89891052246094, 0.6964149475097656, 183.47023010253906, 126.04432678222656, -26.625551223754883, 196.34140014648438, 122.67117309570312, 189.3214111328125, 51.614349365234375, -54.63688278198242, 25.753067016601562, 174.98336791992188, 171.72344970703125, 409.3968505859375, 66.4437484741211, 39.30359649658203, -150.78054809570312, -0.744537353515625, -63.648223876953125, 12.66496467590332, -27.32354736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 93.98252868652344, "std": 142.2838897705078, "min": -179.637451171875, "p10": -51.27795524597167, "median": 55.45370292663574, "p90": 310.2966339111329, "max": 526.4447021484375, "pos_frac": 0.734375, "sample": [15.792621612548828, 11.856639862060547, 55.575103759765625, 61.195526123046875, 48.8299560546875, -28.757850646972656, 33.55072784423828, 102.03914642333984, 41.10103988647461, -65.57041931152344, 226.39776611328125, -123.645263671875, 265.791015625, 86.7323989868164, -27.088623046875, -59.983192443847656, -17.768030166625977, 134.66058349609375, -9.869977951049805, 17.456588745117188, 245.42059326171875, -38.704078674316406, 327.454833984375, -179.637451171875, 320.84051513671875, 81.00850677490234, 144.0145721435547, 22.794816970825195, 38.41978454589844, 13.256755828857422, 55.33230209350586, 106.55864715576172, -23.48291778564453, 154.53488159179688, -67.32112121582031, -11.913589477539062, 370.84063720703125, 108.30213165283203, -117.09815216064453, 360.0482177734375, 434.92791748046875, 0.4372711181640625, 68.8500747680664, 526.4447021484375, 20.79547119140625, 10.399818420410156, -56.6667594909668, 129.98074340820312, 267.2029113769531, 126.4057846069336, 385.35162353515625, 181.744873046875, 96.63825988769531, 185.56895446777344, -13.766698837280273, 193.240234375, 126.48356628417969, -8.920795440673828, 30.115821838378906, 112.84516143798828, 285.6942443847656, 52.47429656982422, 197.543701171875, -17.875120162963867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 62.26060485839844, "std": 113.30496978759766, "min": -218.28578186035156, "p10": -62.99391403198241, "median": 64.19441986083984, "p90": 202.60093078613286, "max": 327.46673583984375, "pos_frac": 0.734375, "sample": [6.752593994140625, 54.95764923095703, 116.11927795410156, -49.84416198730469, 261.1108093261719, 125.02981567382812, -218.28578186035156, -40.7411003112793, 127.4906005859375, 179.47616577148438, 96.63542175292969, -66.24256896972656, 14.336259841918945, -51.56829833984375, 117.84544372558594, -80.76941680908203, -134.95156860351562, 74.95829772949219, 185.12075805664062, 156.05506896972656, 327.46673583984375, 8.95047378540039, -50.34657287597656, 22.778114318847656, 36.713294982910156, -104.31230163574219, 12.9718017578125, -36.868896484375, 105.998779296875, 57.84080505371094, -30.045547485351562, 208.23992919921875, 73.39208221435547, 16.445629119873047, 131.15951538085938, -24.341428756713867, 60.61553955078125, -108.77650451660156, -0.535888671875, 244.41949462890625, 177.18252563476562, 236.84616088867188, 64.25743103027344, 84.23624420166016, -1.3000564575195312, 275.7982482910156, 10.427999496459961, 1.2943477630615234, 84.12584686279297, 64.13140869140625, 142.93292236328125, 42.322059631347656, -55.413719177246094, -203.629150390625, 77.5597915649414, 113.82139587402344, 105.77843475341797, 80.18923950195312, 31.584352493286133, 327.2614440917969, 189.44326782226562, 83.06326293945312, 83.94061279296875, 143.5742645263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 85.69292449951172, "std": 135.52590942382812, "min": -150.4984893798828, "p10": -75.14172286987305, "median": 66.60890579223633, "p90": 255.34508972167973, "max": 478.4013671875, "pos_frac": 0.65625, "sample": [109.76705169677734, -11.407903671264648, 211.36245727539062, 165.0965118408203, -8.53079605102539, -84.645263671875, 225.58685302734375, 169.4380340576172, 178.40771484375, 12.521060943603516, -84.67460632324219, -8.69439697265625, 146.95297241210938, 244.97396850585938, 119.4476547241211, -72.10145568847656, 54.03933334350586, -0.2743988037109375, -150.4984893798828, 259.78985595703125, 279.80206298828125, -134.03309631347656, 236.58285522460938, -76.44469451904297, 143.3607177734375, 57.152862548828125, 12.389747619628906, -24.367965698242188, 478.4013671875, 69.40023803710938, 13.287818908691406, 63.81757354736328, -29.0413818359375, 418.79315185546875, -1.9888916015625, 79.85960388183594, 111.93376922607422, 187.66043090820312, 386.14898681640625, 303.2125244140625, 25.040695190429688, 216.54229736328125, -140.96458435058594, 223.22702026367188, 98.94985961914062, -0.41497802734375, -22.996232986450195, 15.191450119018555, 130.49887084960938, 130.7512969970703, -47.90575408935547, 40.93321990966797, 132.4355926513672, -69.3083724975586, 135.38177490234375, 90.97959899902344, -10.94755744934082, -0.5500411987304688, -103.20841217041016, 187.2947998046875, 48.506195068359375, 266.3015441894531, -32.90740966796875, 119.03240966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 76.04591369628906, "std": 111.08191680908203, "min": -141.28701782226562, "p10": -54.91069946289062, "median": 53.177968978881836, "p90": 254.57139739990242, "max": 344.8037109375, "pos_frac": 0.765625, "sample": [83.29553985595703, 197.990234375, -57.47892761230469, 146.2046356201172, 340.9365539550781, -141.28701782226562, -66.02883911132812, -110.93276977539062, 112.17716979980469, -41.17608642578125, 45.354156494140625, 120.99507141113281, 84.03077697753906, -81.35948181152344, 6.025974273681641, 137.10899353027344, 344.8037109375, 94.21383666992188, 150.39569091796875, 289.7706298828125, 31.367027282714844, 51.258419036865234, -2.28826904296875, -36.247413635253906, 40.28504943847656, -12.410015106201172, 131.916015625, 46.056602478027344, 295.289794921875, 293.40472412109375, 134.50540161132812, -26.5783634185791, 14.01321029663086, -28.044979095458984, 263.54376220703125, 108.74517059326172, 55.09751892089844, -48.91816711425781, 15.3055419921875, 77.75456237792969, -14.873580932617188, 36.00708770751953, 14.272529602050781, 37.85377502441406, 19.959129333496094, 35.808197021484375, 220.24127197265625, 73.03734588623047, 17.579124450683594, 163.2149658203125, 233.63587951660156, -88.91351318359375, 103.12107849121094, 190.72885131835938, 33.35150146484375, 48.053741455078125, 41.38283157348633, 107.49380493164062, 99.99641418457031, -90.45736694335938, 58.88325119018555, 267.7762451171875, 121.08067321777344, 78.60973358154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 73.67124938964844, "std": 120.9752426147461, "min": -200.3194122314453, "p10": -58.9878189086914, "median": 65.53447341918945, "p90": 224.95506896972665, "max": 479.3146057128906, "pos_frac": 0.734375, "sample": [5.192878723144531, 41.9821891784668, 146.20571899414062, 263.71026611328125, 95.494140625, 233.9268798828125, 31.643083572387695, 149.6634979248047, 44.806297302246094, -11.440927505493164, -27.510574340820312, 204.02084350585938, 73.12298583984375, -124.5478744506836, -47.849365234375, -13.138668060302734, 167.4368133544922, 168.41561889648438, -21.6221923828125, 145.90087890625, 77.92089080810547, 34.8189697265625, 61.74809265136719, 3.8327102661132812, 24.569448471069336, 240.82968139648438, 132.87257385253906, -64.11662292480469, -200.3194122314453, 144.78477478027344, 105.25321197509766, -136.67591857910156, -14.714441299438477, 16.466917037963867, 168.64068603515625, 74.6185302734375, 245.90878295898438, 479.3146057128906, 66.95179748535156, 109.4672622680664, 92.7939224243164, 103.72140502929688, 132.26527404785156, 169.16859436035156, 157.33099365234375, -15.756759643554688, -60.87774658203125, -9.653541564941406, 115.52259826660156, 45.65918731689453, -54.57798767089844, 182.13259887695312, 3.8365325927734375, -102.13101959228516, 426.98223876953125, 30.453163146972656, -109.80377960205078, 34.87297058105469, 83.74905395507812, 87.19632720947266, 64.11714935302734, -46.848419189453125, 44.72935485839844, 242.4931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 72.52832794189453, "std": 116.85908508300781, "min": -179.22708129882812, "p10": -62.01061477661133, "median": 67.79509735107422, "p90": 276.6533081054688, "max": 357.753662109375, "pos_frac": 0.703125, "sample": [11.190299987792969, -15.001136779785156, 281.584228515625, 357.753662109375, -29.859222412109375, -132.6845245361328, 93.55884552001953, 1.869110107421875, -15.770706176757812, 83.62251281738281, 148.99911499023438, -33.799461364746094, 102.37093353271484, 338.40496826171875, 114.44831085205078, 49.05565643310547, 118.00946807861328, -8.194122314453125, 14.881690979003906, -66.6810531616211, 57.85620880126953, 185.16439819335938, 105.33624267578125, -38.176185607910156, 110.24400329589844, 0.7350654602050781, 125.59451293945312, -179.22708129882812, -66.37728881835938, 120.39864349365234, 285.95123291015625, 16.094608306884766, 156.36590576171875, 10.60944938659668, 74.11587524414062, -13.911148071289062, -67.35649108886719, 320.63018798828125, 133.37918090820312, 83.35853576660156, -59.90312194824219, 41.95781707763672, -31.61199951171875, 265.1478271484375, 56.278045654296875, 107.0893783569336, -62.91382598876953, 296.03863525390625, 131.5068817138672, 30.212888717651367, 64.21609497070312, 117.32791137695312, 71.37409973144531, 203.94189453125, 81.4713363647461, -26.918792724609375, 71.68098449707031, 15.339401245117188, -99.54989624023438, -2.7564945220947266, 290.9808044433594, 88.467041015625, 177.03799438476562, -19.14657211303711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 87.4451904296875, "std": 110.57695770263672, "min": -160.79660034179688, "p10": -25.306178092956536, "median": 73.25679016113281, "p90": 217.67321777343756, "max": 443.67596435546875, "pos_frac": 0.828125, "sample": [-156.55809020996094, 190.03268432617188, 165.7974853515625, 165.51104736328125, 226.41595458984375, 50.534828186035156, 83.85965728759766, 147.32821655273438, 78.09152221679688, 64.58222961425781, 223.2183837890625, 56.01246643066406, 19.994218826293945, 51.895469665527344, -78.80012512207031, 138.84388732910156, 71.5948486328125, 68.90330505371094, -27.548112869262695, 79.12189483642578, 10.259626388549805, 147.0, 81.90721893310547, 204.7344970703125, 34.1676025390625, 153.2573699951172, -30.387039184570312, -102.28467559814453, 160.48455810546875, 17.282316207885742, 46.13163757324219, -19.279052734375, 163.50868225097656, 193.21981811523438, 155.5562744140625, 123.55160522460938, 23.47223663330078, 116.90025329589844, 21.48082733154297, 175.3371124267578, 48.40422058105469, 91.2989501953125, 71.38069915771484, 41.321044921875, 44.76573181152344, 5.556549072265625, 74.91873168945312, 55.30296325683594, -160.79660034179688, 144.7203826904297, 78.3268051147461, 55.0616455078125, 333.9414367675781, 6.866943359375, 361.86529541015625, 258.466552734375, -6.3770599365234375, 241.27194213867188, 103.82008361816406, -77.07353210449219, -18.713714599609375, 123.4298095703125, 443.67596435546875, -20.074996948242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 58.802772521972656, "std": 108.18260192871094, "min": -155.85704040527344, "p10": -79.08893585205078, "median": 41.996009826660156, "p90": 212.00552368164065, "max": 313.34466552734375, "pos_frac": 0.703125, "sample": [107.6056900024414, 92.91057586669922, -88.9080810546875, 23.526206970214844, -79.67315673828125, -5.978452682495117, 84.47916412353516, 125.98135375976562, -28.167648315429688, -97.35257720947266, 140.72039794921875, -151.21499633789062, -103.70619201660156, 35.38750457763672, 31.211837768554688, -98.31529998779297, -17.10605239868164, -66.87550354003906, 162.7135009765625, 159.57156372070312, 86.20376586914062, 22.807533264160156, 15.42776107788086, 313.34466552734375, -155.85704040527344, 23.7401123046875, -5.213804244995117, -50.63079071044922, -10.78509521484375, 150.99609375, -69.1084213256836, 24.184743881225586, 233.59869384765625, 206.04885864257812, 75.72854614257812, -24.027259826660156, 76.01551055908203, 66.03271484375, 86.86028289794922, 1.892709732055664, 45.53272247314453, 68.01534271240234, 190.0487518310547, 76.63108825683594, 214.55838012695312, -1.3305511474609375, 105.18119812011719, 57.38026428222656, 7.625923156738281, -77.72575378417969, 37.076324462890625, 256.32684326171875, 38.45929718017578, -44.258155822753906, 67.57426452636719, 279.0926513671875, 21.72557830810547, 287.22528076171875, 240.9892578125, 133.03262329101562, 122.00932312011719, 18.18842315673828, 134.61831665039062, 191.33047485351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 106.9185562133789, "std": 152.06436157226562, "min": -131.01083374023438, "p10": -31.513973999023438, "median": 69.9689712524414, "p90": 295.04386901855474, "max": 596.9814453125, "pos_frac": 0.78125, "sample": [503.0106201171875, 334.06365966796875, 32.5580940246582, 521.9105834960938, -30.980140686035156, 8.468334197998047, 21.79684829711914, 73.71746826171875, 189.44338989257812, 82.80108642578125, 158.7393341064453, -15.661911010742188, 16.104209899902344, 172.27529907226562, -91.80455017089844, 73.5234375, 177.83837890625, 11.331161499023438, -8.396114349365234, 218.178955078125, -10.534215927124023, 596.9814453125, 49.83658981323242, -49.867244720458984, -53.635658264160156, -59.18449020385742, 52.66698455810547, 169.02383422851562, -131.01083374023438, 282.7764587402344, 300.30133056640625, 517.7705078125, 123.96459197998047, 63.081966400146484, 31.033889770507812, 379.17926025390625, 18.397666931152344, 217.04901123046875, 34.4627685546875, 173.4623565673828, -26.416004180908203, 91.37983703613281, 25.182777404785156, 6.923583984375, 191.72325134277344, 140.31561279296875, 220.0642852783203, -12.635501861572266, 78.94508361816406, 210.67953491210938, 37.74747085571289, 89.88102722167969, -73.70445251464844, 66.41450500488281, -6.510223388671875, -31.742759704589844, 119.08011627197266, 7.2846527099609375, 124.37387084960938, 0.214691162109375, 106.87728881835938, 20.399259567260742, 200.4344482421875, 101.20081329345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 111.38298034667969, "std": 122.61681365966797, "min": -144.94427490234375, "p10": -21.548593139648435, "median": 91.24752426147461, "p90": 280.11407470703136, "max": 426.68621826171875, "pos_frac": 0.828125, "sample": [105.73466491699219, 68.47489929199219, 138.93402099609375, 235.5368194580078, 137.56845092773438, 144.3356170654297, 124.02423095703125, 67.13211822509766, 108.59939575195312, 56.74979782104492, 412.03594970703125, 216.55172729492188, 77.91337585449219, -23.966049194335938, 28.2796630859375, 68.39840698242188, 81.02716827392578, -66.16039276123047, 147.3802032470703, 203.55734252929688, 29.878999710083008, 39.873931884765625, 76.06355285644531, 68.13998413085938, 114.12260437011719, -114.96572875976562, 290.14019775390625, 227.2958984375, -14.939849853515625, 173.93975830078125, -14.187416076660156, 118.82247924804688, 73.84193420410156, 70.56884002685547, 398.50250244140625, 186.781005859375, 301.3725891113281, 85.16419219970703, 219.35072326660156, -144.94427490234375, 256.71978759765625, 300.6501159667969, 340.4641418457031, 146.19906616210938, 426.68621826171875, -18.21246337890625, 203.1021728515625, 149.7099609375, 213.60198974609375, 194.5478057861328, 97.33085632324219, 19.522232055664062, -11.066610336303711, 38.23012161254883, 70.33768463134766, 150.10565185546875, 109.02997589111328, 64.8460464477539, 17.761642456054688, -116.52239227294922, 22.836254119873047, 17.957529067993164, -59.27836608886719, -22.978363037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 81.37892150878906, "std": 113.91191101074219, "min": -94.78436279296875, "p10": -61.410269546508786, "median": 56.45004653930664, "p90": 222.4830261230469, "max": 428.4840087890625, "pos_frac": 0.796875, "sample": [24.012008666992188, 129.45516967773438, 29.436901092529297, 22.069637298583984, 5.871648788452148, 37.63357162475586, 141.9815673828125, -58.26044845581055, 2.084918975830078, 9.82403564453125, -94.78436279296875, 202.80894470214844, 428.4840087890625, 108.1611328125, -62.76019287109375, 213.411865234375, 189.4034423828125, 252.4266815185547, 226.37066650390625, 58.161277770996094, 237.62155151367188, 281.5250244140625, 197.77273559570312, 197.66531372070312, -48.34345245361328, -56.541175842285156, -21.695960998535156, 134.7836456298828, 164.96566772460938, 147.5316162109375, 101.51533508300781, 36.26068115234375, -75.76891326904297, 107.11239624023438, 41.62480163574219, 130.69859313964844, -92.39363098144531, -12.435901641845703, -19.14346694946289, 44.15357971191406, 30.955625534057617, 96.81599426269531, 73.4801025390625, 118.70712280273438, 143.3429718017578, 69.4850082397461, 27.229482650756836, -68.69464874267578, 312.4305419921875, 69.8139419555664, 136.5122528076172, 18.961240768432617, 19.55009651184082, 33.59617614746094, 134.321533203125, 54.73881530761719, 26.415708541870117, -94.23987579345703, 51.227237701416016, -66.5316162109375, 406.7461242675781, 18.097877502441406, 88.53408813476562, 144.05435180664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 73.04991912841797, "std": 145.65780639648438, "min": -315.373779296875, "p10": -83.02437057495115, "median": 43.82565498352051, "p90": 260.683740234375, "max": 599.1041259765625, "pos_frac": 0.734375, "sample": [102.41486358642578, 99.62240600585938, 7.064311981201172, 18.646827697753906, 265.43701171875, 121.94368743896484, 15.087112426757812, 186.89918518066406, 107.05585479736328, 35.9112663269043, 10.592754364013672, 30.567466735839844, -315.373779296875, -0.4527740478515625, 62.25590133666992, -37.501190185546875, 28.147705078125, -105.76936340332031, 92.39802551269531, 183.4039306640625, 162.91624450683594, -64.4923095703125, 99.04232788085938, -94.3875732421875, 234.77130126953125, 269.0517272949219, 67.59744262695312, 11.391616821289062, -27.045181274414062, 33.17137908935547, 233.57601928710938, -29.673988342285156, 24.45341682434082, 73.9986343383789, 104.87449645996094, 142.04566955566406, 249.5927734375, 30.661569595336914, 70.07722473144531, 306.2320556640625, -0.3254833221435547, 416.26611328125, -147.82586669921875, 102.60874938964844, 3.8052291870117188, 66.03578186035156, 43.359893798828125, -90.96668243408203, 381.4700622558594, -17.78058624267578, 599.1041259765625, -13.0517578125, 104.22557830810547, 17.214195251464844, 311.6944885253906, -20.928314208984375, 99.13764953613281, 4.5981292724609375, 44.29141616821289, -41.7974853515625, -142.330078125, 101.9658203125, 208.56947326660156, -160.3516845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 83.38722229003906, "std": 119.77348327636719, "min": -286.2685852050781, "p10": -33.584522247314446, "median": 70.38172912597656, "p90": 234.87453002929686, "max": 376.8966979980469, "pos_frac": 0.8125, "sample": [205.29791259765625, -157.1317138671875, 180.67947387695312, 87.46468353271484, 241.5595703125, 84.58834838867188, 134.0853729248047, 20.253585815429688, 152.83131408691406, -2.1400909423828125, 101.45350646972656, -14.918766021728516, 71.6285171508789, -29.187118530273438, 10.044857025146484, -58.58332824707031, 122.5881118774414, 280.92584228515625, 4.15362548828125, 323.9654235839844, 5.649293899536133, -28.757299423217773, 171.58383178710938, 50.100284576416016, 5.0742950439453125, 22.370826721191406, -286.2685852050781, -35.46912384033203, 70.74765014648438, 212.271728515625, 70.59832763671875, 133.99159240722656, 56.59687423706055, 75.88192749023438, 190.20382690429688, -76.98070526123047, 309.3785400390625, 6.528205871582031, 8.743764877319336, 376.8966979980469, 72.6333999633789, -124.42349243164062, 70.16513061523438, 116.63520812988281, 223.07830810546875, -41.47875213623047, 26.44268226623535, 4.127450942993164, 53.29816436767578, 66.90364074707031, 106.01641082763672, 39.66773223876953, 264.32666015625, 35.83211898803711, 141.85455322265625, 62.487457275390625, 186.86009216308594, 234.97027587890625, -19.932613372802734, 217.26116943359375, 234.651123046875, 19.07354164123535, 225.5531005859375, 22.077632904052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 71.26010131835938, "std": 142.85208129882812, "min": -264.6181640625, "p10": -96.5367042541504, "median": 61.56766700744629, "p90": 240.61990509033205, "max": 628.7731323242188, "pos_frac": 0.71875, "sample": [-94.33303833007812, 243.7047882080078, 34.734535217285156, 109.68203735351562, -117.3177719116211, -65.16935729980469, 4.964164733886719, 127.53317260742188, -82.6571044921875, 155.56689453125, 13.09488296508789, 185.25070190429688, 99.95208740234375, 107.44071960449219, 56.26054382324219, -63.54168701171875, 107.97700500488281, -125.31986999511719, 109.92442321777344, -5.878944396972656, 233.42184448242188, 61.77805709838867, 54.17546081542969, 98.04532623291016, 321.7919616699219, 96.634521484375, -97.48113250732422, 628.7731323242188, -110.0821762084961, 53.80751037597656, 68.92124938964844, -264.6181640625, 418.51873779296875, 168.7059326171875, 110.46356201171875, -132.83526611328125, 222.70062255859375, 112.58108520507812, 20.035751342773438, 325.5035400390625, 23.762542724609375, 72.12702178955078, 246.10263061523438, -33.698509216308594, -106.77413940429688, 7.0364990234375, 61.357276916503906, 121.03477478027344, -67.0699462890625, 123.22744750976562, -31.218955993652344, -60.08642578125, 162.86526489257812, 48.577842712402344, 98.13214111328125, -1.351175308227539, 62.66632080078125, 174.40274047851562, -21.546852111816406, 6.2860107421875, 112.9692611694336, 15.039247512817383, 300.2015075683594, 53.89470291137695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 72.91912841796875, "std": 123.60163116455078, "min": -218.680419921875, "p10": -75.85497589111327, "median": 82.26471328735352, "p90": 219.62178955078127, "max": 477.93341064453125, "pos_frac": 0.6875, "sample": [-20.418212890625, 103.71072387695312, -124.08909606933594, 96.21673583984375, 50.042938232421875, -61.957313537597656, 159.31317138671875, 106.29777526855469, 105.54467010498047, -90.990966796875, -138.6022186279297, 53.4515495300293, 86.28958892822266, -67.90412902832031, 175.30661010742188, 84.53396606445312, 26.482219696044922, 80.75680541992188, -68.06466674804688, 78.73506927490234, -48.62428283691406, 97.19735717773438, 221.69989013671875, 160.04022216796875, 30.27355194091797, 82.13947296142578, 253.8564453125, 214.77288818359375, -113.42471313476562, 250.5399169921875, -36.247718811035156, 120.93376922607422, 118.61629486083984, 95.32476806640625, 145.46737670898438, 325.8829040527344, 477.93341064453125, 53.73478698730469, 130.108154296875, 25.871692657470703, -15.464412689208984, -40.87652587890625, 82.38995361328125, 77.33196258544922, 199.09329223632812, -28.490066528320312, -38.75696563720703, 25.09053611755371, -44.8773193359375, 185.0198974609375, 172.5510711669922, -8.2255859375, -102.50442504882812, 117.27719116210938, 116.47803497314453, -218.680419921875, 61.80256271362305, 124.567138671875, 241.04022216796875, 122.2049560546875, -17.20178985595703, -79.19367980957031, 287.89385986328125, 207.6036376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 72.93345642089844, "std": 107.1478271484375, "min": -202.35015869140625, "p10": -32.523614120483394, "median": 54.441823959350586, "p90": 237.11280212402343, "max": 376.60504150390625, "pos_frac": 0.8125, "sample": [38.03215026855469, 82.34513092041016, 35.51471710205078, -7.608757019042969, 3.495716094970703, 42.06156921386719, 0.16426467895507812, 157.06646728515625, 138.40599060058594, 12.070907592773438, -17.501678466796875, -70.78562927246094, 16.005464553833008, 10.748085021972656, 97.29872131347656, 264.69244384765625, 87.9435043334961, 69.18223571777344, 74.89762115478516, 239.32363891601562, 131.83375549316406, 107.18084716796875, 81.88870239257812, -80.75261688232422, 55.01573944091797, 26.687156677246094, 375.9169921875, 33.095096588134766, 169.76568603515625, 237.85748291015625, 77.24361419677734, 15.832555770874023, -69.74673461914062, 118.13966369628906, 102.18536376953125, -31.181930541992188, 132.97607421875, 9.16217041015625, -2.4508819580078125, 235.37521362304688, 77.36514282226562, 221.19668579101562, -33.0986213684082, 51.050437927246094, -16.789146423339844, 15.288936614990234, 245.258544921875, 119.91966247558594, -48.107574462890625, 8.8436279296875, 97.72547149658203, 53.8679084777832, 145.31451416015625, -87.14108276367188, -202.35015869140625, 376.60504150390625, 34.851417541503906, 6.391298294067383, 95.63925170898438, 27.08908462524414, 307.1037292480469, 71.85807800292969, 63.51084518432617, 36.97137451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 88.55628967285156, "std": 129.46075439453125, "min": -181.3538055419922, "p10": -59.93233489990234, "median": 65.61456680297852, "p90": 270.6243759155274, "max": 481.3488464355469, "pos_frac": 0.8125, "sample": [20.409835815429688, 6.7804412841796875, -56.128562927246094, -122.78448486328125, 11.617385864257812, 43.33251190185547, -18.418704986572266, 6.402334213256836, 112.3564224243164, 225.9495086669922, 253.5518035888672, 7.644157409667969, 75.02767181396484, 20.362699508666992, 56.075523376464844, -48.775115966796875, 134.5655517578125, 186.6568145751953, 138.77589416503906, 277.9411926269531, 56.20146179199219, 34.593624114990234, 81.8880844116211, -18.8345890045166, 361.1807556152344, -82.13775634765625, 153.08343505859375, 316.81781005859375, 52.90147399902344, 98.37594604492188, 14.332014083862305, -13.640766143798828, 84.80440521240234, -88.72274780273438, 174.3375244140625, -61.562522888183594, 249.94454956054688, 123.1839828491211, -126.23373413085938, 318.21417236328125, 340.77874755859375, 343.5862121582031, 45.207244873046875, 150.61505126953125, 15.294961929321289, -181.3538055419922, 2.0731544494628906, 131.6436767578125, 114.35780334472656, 162.67108154296875, 76.63996887207031, 48.320159912109375, 14.422218322753906, 155.0596923828125, -103.64850616455078, 187.3676300048828, 171.30015563964844, 97.95523071289062, 50.44403839111328, 13.921745300292969, 481.3488464355469, 143.12696838378906, 113.47355651855469, 32.926780700683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 116.53382873535156, "std": 118.18341064453125, "min": -136.80764770507812, "p10": -12.509245872497559, "median": 96.27736282348633, "p90": 257.97295837402345, "max": 434.91253662109375, "pos_frac": 0.828125, "sample": [25.964872360229492, 40.24542236328125, 63.61481475830078, -136.80764770507812, 95.67604064941406, 207.53054809570312, 247.71585083007812, 101.6581802368164, 22.63524055480957, 165.56211853027344, 332.42169189453125, -89.92924499511719, -24.174041748046875, 332.33306884765625, 146.10760498046875, 40.95440673828125, 236.713623046875, 258.2967529296875, 257.2174377441406, 175.65151977539062, 198.6768798828125, -82.95734405517578, 205.77133178710938, 434.91253662109375, 60.29431915283203, 145.0791015625, 330.9752197265625, 46.53285217285156, 142.33291625976562, 94.19561004638672, 2.0306930541992188, 66.67839813232422, 318.20220947265625, 14.517391204833984, -0.36666107177734375, 37.45533752441406, -4.132450103759766, 299.7471008300781, 4.776313781738281, 95.37337493896484, 139.73129272460938, 126.98274230957031, 137.12466430664062, 234.490478515625, -3.1599063873291016, 239.975830078125, 254.87078857421875, 205.47242736816406, -35.832157135009766, 104.89313507080078, 187.29132080078125, 96.8786849975586, 92.45054626464844, 162.64492797851562, 48.19501495361328, 78.64752197265625, 160.2295379638672, -12.480119705200195, 39.23064422607422, 220.685546875, -56.531776428222656, 84.54704284667969, -12.521728515625, 54.86528015136719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 101.51274108886719, "std": 132.17630004882812, "min": -80.47569274902344, "p10": -40.32006149291992, "median": 60.6603889465332, "p90": 255.21897277832036, "max": 565.835205078125, "pos_frac": 0.8125, "sample": [18.34564208984375, 34.30491638183594, 105.43209838867188, 269.3408203125, 69.85932922363281, 30.45950698852539, 8.511077880859375, 361.9445495605469, 17.177827835083008, -31.479568481445312, -41.346534729003906, 481.6135559082031, 12.96745491027832, -80.47569274902344, 260.57122802734375, 74.9114990234375, 46.617095947265625, 241.72410583496094, 195.44049072265625, -4.369834899902344, 7.6390838623046875, 60.27959442138672, 164.1421356201172, -46.03596496582031, 242.73037719726562, 2.7431697845458984, 31.5699405670166, 225.32518005371094, 209.91070556640625, 19.04444122314453, 133.16099548339844, -55.582061767578125, 174.2496337890625, -58.60234832763672, 53.45719909667969, 195.54681396484375, -56.71152877807617, 118.70958709716797, 199.93280029296875, 146.19862365722656, 218.6165771484375, 1.1203289031982422, -55.16209411621094, -3.9742469787597656, 153.78305053710938, 150.86557006835938, 140.8131866455078, 81.83676147460938, 134.47271728515625, 311.924560546875, 34.03935241699219, -37.924957275390625, 31.3758544921875, 17.584606170654297, 151.32034301757812, 61.04118347167969, -12.121532440185547, 1.5353813171386719, 136.2624053955078, 1.6985740661621094, 177.97100830078125, 565.835205078125, 22.268112182617188, 372.37554931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 101.632568359375, "std": 130.6456298828125, "min": -147.1535186767578, "p10": -21.81004199981689, "median": 70.69011306762695, "p90": 265.9800994873047, "max": 609.20947265625, "pos_frac": 0.734375, "sample": [201.57662963867188, 55.973480224609375, 260.13116455078125, 143.09255981445312, 64.40512084960938, 133.05384826660156, -147.1535186767578, 113.85868835449219, 208.5921630859375, 189.6007080078125, 288.0478515625, 48.38166809082031, 89.73919677734375, 302.7166442871094, 145.9109649658203, 96.66307067871094, 27.99970245361328, 406.6389465332031, 37.71385192871094, 225.6806640625, 71.61386108398438, 182.35084533691406, -89.14689636230469, 166.90078735351562, -1.0782546997070312, 46.08911895751953, 214.31591796875, -11.740966796875, -5.5203704833984375, -3.3335113525390625, 69.76636505126953, 303.1348876953125, 120.81248474121094, -34.51927185058594, 14.216339111328125, 122.57987976074219, -17.9971923828125, 268.4867858886719, 1.1135120391845703, -26.09661293029785, -86.86125183105469, 188.12176513671875, -4.576986312866211, 55.30499267578125, -77.91232299804688, 167.9546356201172, 134.2793731689453, 34.66489791870117, -12.433570861816406, -5.945892333984375, 222.46876525878906, 15.57512092590332, 76.42295837402344, -12.275588989257812, 228.07757568359375, 43.036922454833984, 43.97453308105469, -23.444120407104492, 196.54519653320312, 52.223114013671875, 609.20947265625, 313.24981689453125, 74.89604949951172, -12.64178466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 123.82139587402344, "std": 156.37478637695312, "min": -141.23056030273438, "p10": -16.3814416885376, "median": 115.44565200805664, "p90": 361.15510253906257, "max": 595.0059814453125, "pos_frac": 0.75, "sample": [60.98225021362305, 13.355331420898438, 163.297607421875, -3.773651123046875, 172.04388427734375, 95.57279205322266, 112.85492706298828, 47.05104064941406, 17.674671173095703, 297.4652099609375, -5.049201965332031, 119.85139465332031, 347.0068054199219, 119.88885498046875, 52.164100646972656, 161.00930786132812, 415.9053039550781, 138.25148010253906, 595.0059814453125, -88.43440246582031, 222.3529510498047, 64.3680191040039, 3.744781494140625, 58.521751403808594, -3.9032764434814453, 191.10977172851562, 420.5353698730469, -60.65709686279297, 61.135986328125, -10.553169250488281, -112.42216491699219, 126.22567749023438, 224.88226318359375, 22.008403778076172, 168.89443969726562, 199.6071319580078, 157.98162841796875, -111.9381103515625, 65.82160186767578, 298.0080261230469, 31.859310150146484, 118.036376953125, -141.23056030273438, 465.5491027832031, -5.695636749267578, 175.80308532714844, 126.58135986328125, 163.89846801757812, -16.495229721069336, 494.0638732910156, 40.56559753417969, 154.27737426757812, 367.2186584472656, -0.2421588897705078, -10.1241455078125, 251.90174865722656, 125.80767822265625, -16.115936279296875, 37.40312194824219, 254.22152709960938, 397.30181884765625, -103.43072509765625, 201.16207885742188, -5.595176696777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 86.20681762695312, "std": 154.88780212402344, "min": -281.56451416015625, "p10": -95.23835220336915, "median": 90.79534530639648, "p90": 321.46614074707037, "max": 416.2389831542969, "pos_frac": 0.6875, "sample": [131.47064208984375, 173.92430114746094, 195.26351928710938, 416.2389831542969, 100.89341735839844, -102.63627624511719, 202.02589416503906, 21.132122039794922, -25.40399932861328, 10.748220443725586, 124.15545654296875, 68.32415008544922, 58.08122253417969, 108.73188018798828, -16.66162872314453, 102.22697448730469, -95.50790405273438, 335.2160949707031, -62.27231979370117, -90.3243408203125, 76.7969970703125, -106.48468017578125, 400.0874938964844, 25.57310676574707, 229.6325225830078, 143.55624389648438, 169.44097900390625, 328.09368896484375, 180.6729736328125, 39.75309753417969, 96.77995300292969, 306.0018615722656, 386.87310791015625, 104.11180114746094, 84.81073760986328, 146.7783203125, -38.22025680541992, -30.552539825439453, -8.183815002441406, 97.96367645263672, 105.55477142333984, -42.811279296875, 80.80087280273438, -117.66447448730469, 121.1597900390625, 383.7364196777344, -94.6093978881836, 277.88421630859375, 110.87096405029297, -154.30899047851562, -60.30475616455078, 266.731201171875, 28.140518188476562, 271.9095764160156, 334.9598693847656, 229.31968688964844, -189.37673950195312, 134.29660034179688, -75.80274200439453, -70.65064239501953, -281.56451416015625, 26.277618408203125, 24.58791160583496, -81.01168060302734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 114.30379486083984, "std": 138.55111694335938, "min": -212.0823211669922, "p10": -51.88935089111327, "median": 103.78190231323242, "p90": 278.81434020996096, "max": 579.3773803710938, "pos_frac": 0.796875, "sample": [112.7789535522461, 186.51089477539062, -63.70881652832031, 10.011219024658203, -73.4395980834961, 579.3773803710938, -33.23093795776367, 94.83798217773438, 112.89057922363281, 68.57878112792969, 3.224822998046875, 261.8760986328125, -212.0823211669922, 158.27529907226562, 190.3650665283203, 142.74267578125, 103.16854858398438, 89.8084716796875, 104.39525604248047, 113.25654602050781, 180.218505859375, -176.83291625976562, 82.87550354003906, 315.94183349609375, 69.56778717041016, 253.43496704101562, 167.0618896484375, -4.9133453369140625, 39.426414489746094, 62.92775344848633, 289.56951904296875, -0.8031349182128906, 256.3949890136719, 172.67349243164062, 81.68016815185547, -1.0461406707763672, 78.00208282470703, 219.27792358398438, -56.413818359375, 210.75576782226562, 95.97930145263672, 25.83972930908203, 257.10003662109375, 67.05436706542969, -134.66424560546875, 109.61491394042969, 6.06402587890625, 294.64837646484375, 259.73834228515625, 116.15860748291016, 162.8946990966797, 279.8892517089844, -41.33226013183594, 358.745361328125, 91.20011138916016, -32.12104797363281, 222.26123046875, 192.1514434814453, 28.821495056152344, -82.13956451416016, 276.30621337890625, 331.14007568359375, 36.420875549316406, 204.2352294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 67.79975128173828, "std": 128.6459503173828, "min": -234.97799682617188, "p10": -65.20425949096679, "median": 50.156349182128906, "p90": 227.01525115966803, "max": 398.72113037109375, "pos_frac": 0.765625, "sample": [153.328369140625, -63.951568603515625, 14.947393417358398, 51.07561111450195, 118.58454132080078, -3.239004135131836, 37.48714828491211, 32.263343811035156, 98.93282318115234, 141.05445861816406, -11.739002227783203, 18.228479385375977, 4.200109481811523, 13.448692321777344, -188.40023803710938, 240.80001831054688, 139.80271911621094, -33.353736877441406, 84.3970947265625, -11.20553970336914, 49.23708724975586, -65.74112701416016, -24.158309936523438, 243.2423858642578, -80.91941833496094, 92.21388244628906, 122.81173706054688, 314.9019775390625, 69.07808685302734, 53.118736267089844, 128.40386962890625, 87.07694244384766, -227.30189514160156, 48.144981384277344, 16.255783081054688, 89.90977478027344, 17.084774017333984, 204.9333953857422, 136.43238830566406, -78.10847473144531, 11.244789123535156, -234.97799682617188, 234.50653076171875, -22.98202896118164, 398.72113037109375, 187.78627014160156, -205.59458923339844, 66.28633880615234, -55.70307922363281, 149.7167205810547, 209.5355987548828, 397.02490234375, 160.96334838867188, 31.485454559326172, 133.34127807617188, 5.671833038330078, 5.112604141235352, 11.191383361816406, 309.2489013671875, 54.28176498413086, 32.86298370361328, 208.5863037109375, 169.51239013671875, 48.083091735839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 124.02388000488281, "std": 145.47674560546875, "min": -233.61288452148438, "p10": -58.23430671691892, "median": 109.54048156738281, "p90": 309.23144531250006, "max": 416.3148498535156, "pos_frac": 0.84375, "sample": [317.9324645996094, 90.16316223144531, 346.8059997558594, 225.72158813476562, 92.03103637695312, 251.70787048339844, 260.8251953125, 285.88348388671875, 359.798828125, 84.64837646484375, 178.3543243408203, 49.20772171020508, 30.974138259887695, -24.658016204833984, 291.9760437011719, -233.61288452148438, 265.7052307128906, 72.94650268554688, 5.394649505615234, 64.16326904296875, 411.00286865234375, 5.044229507446289, 264.44610595703125, 120.45652770996094, 169.59573364257812, -69.03793334960938, 282.5620422363281, 98.62443542480469, 21.976123809814453, 77.32573699951172, 172.239013671875, 288.5437927246094, -121.7188491821289, -139.15744018554688, 187.12014770507812, 250.47354125976562, 43.421173095703125, 152.75942993164062, 240.59674072265625, 221.1697235107422, -89.75035858154297, 77.30562591552734, 27.71635627746582, 135.93861389160156, 202.70249938964844, 191.2952880859375, 203.97312927246094, 126.52674865722656, -108.51768493652344, 6.140872955322266, 416.3148498535156, 151.94821166992188, 46.74945068359375, 8.401971817016602, -31.591110229492188, 9.851369857788086, -131.77349853515625, 86.67786407470703, 316.6266174316406, 21.80730438232422, 381.664306640625, -33.02584457397461, 56.81703186035156, 170.3164825439453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 64.02972412109375, "std": 118.47893524169922, "min": -190.13597106933594, "p10": -92.89681015014648, "median": 66.81543731689453, "p90": 207.207748413086, "max": 373.399169921875, "pos_frac": 0.75, "sample": [16.817459106445312, 84.68704986572266, -14.786558151245117, 73.11693572998047, 15.91445541381836, 169.52064514160156, 142.2489776611328, 152.931396484375, 49.03826904296875, 63.046730041503906, -89.72962951660156, 147.7247314453125, 67.171875, 23.027406692504883, 38.768585205078125, 99.88260650634766, 373.399169921875, 233.37643432617188, 152.45924377441406, 35.47687530517578, 71.14643096923828, 130.7149658203125, 308.35479736328125, 109.84202575683594, -17.761024475097656, 5.010646820068359, 77.68902587890625, 66.45899963378906, -6.9273681640625, -190.13597106933594, 187.1007080078125, 82.67884063720703, -58.35301971435547, 15.597156524658203, 191.005615234375, 79.75796508789062, 42.75701141357422, 123.12166595458984, 146.71548461914062, 259.2823486328125, -158.9371337890625, -39.005821228027344, 340.96038818359375, -86.99849700927734, 31.215919494628906, -39.499267578125, 73.96116638183594, -106.31736755371094, -122.68819427490234, -117.19845581054688, 175.055908203125, 131.21868896484375, 225.76100158691406, 214.15151977539062, 93.43177795410156, 49.590919494628906, 27.79779052734375, -94.2541732788086, 162.7701416015625, 95.83924865722656, -162.25906372070312, -82.87928771972656, 21.024932861328125, 7.0115509033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 68.87787628173828, "std": 125.69293212890625, "min": -188.4129638671875, "p10": -70.48152122497557, "median": 37.929887771606445, "p90": 229.72815246582033, "max": 383.8316345214844, "pos_frac": 0.734375, "sample": [29.912139892578125, -133.4888916015625, 81.79796600341797, 87.29119110107422, 200.4324188232422, 219.90509033203125, -11.952484130859375, 13.038040161132812, 170.96481323242188, -20.753768920898438, 339.1295471191406, 383.8316345214844, 71.33364868164062, 233.56033325195312, -77.19010162353516, 226.58120727539062, 6.726837158203125, 225.08346557617188, 111.82471466064453, 309.44635009765625, -15.559532165527344, 331.6009826660156, -108.13046264648438, 27.374099731445312, 42.34384536743164, 39.74567413330078, 38.98893737792969, 9.523582458496094, 90.51103210449219, 0.6761455535888672, 42.98164367675781, 147.2275390625, 160.28823852539062, 216.3512725830078, -100.49003601074219, 231.07684326171875, 12.720954895019531, -14.717613220214844, -48.496334075927734, 23.131732940673828, 73.63875579833984, -29.9647216796875, 36.8708381652832, 16.066362380981445, 25.594749450683594, 214.9825897216797, 74.75537109375, 57.64529800415039, -78.63639831542969, -153.48599243164062, -54.82816696166992, 19.50600242614746, -7.368961334228516, -8.751380920410156, -188.4129638671875, 33.37092590332031, -46.764434814453125, 184.21261596679688, 41.887794494628906, 127.04073333740234, 14.320381164550781, 1.936676025390625, 101.95315551757812, 357.9920959472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 52.9513053894043, "std": 132.62489318847656, "min": -202.38690185546875, "p10": -70.14452972412109, "median": 26.676910400390625, "p90": 228.2518173217774, "max": 461.5385437011719, "pos_frac": 0.59375, "sample": [-175.603271484375, -68.46900939941406, -22.898073196411133, 89.52389526367188, 73.55641174316406, 14.308320999145508, -8.470331192016602, 79.97689819335938, 449.1899719238281, -12.216453552246094, 27.626934051513672, 122.92778778076172, 112.74772644042969, -7.782917022705078, 294.1100769042969, 40.706756591796875, -10.636589050292969, 30.77252960205078, 144.6269073486328, -112.69387817382812, -8.356821060180664, 36.67277526855469, 25.726886749267578, 97.29203796386719, 69.24984741210938, -16.627525329589844, -134.0389404296875, 14.62591552734375, 88.9658432006836, -137.78036499023438, 15.65118408203125, 22.386043548583984, 92.28715515136719, 108.11961364746094, 38.959808349609375, 380.9720458984375, -57.53722381591797, 28.43592071533203, 212.53778076171875, -70.86260986328125, 38.0046501159668, -6.753824234008789, -27.51160430908203, -9.278274536132812, -24.51884651184082, -12.152624130249023, 344.307373046875, 139.1739501953125, 149.96844482421875, -2.163778305053711, 461.5385437011719, -39.553489685058594, 23.33562469482422, 75.89263916015625, -23.0076904296875, 234.9864044189453, 54.44976806640625, 126.24374389648438, -202.38690185546875, 298.8709411621094, -85.56502532958984, 100.68799591064453, -26.364791870117188, -67.302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 104.33164978027344, "std": 127.11091613769531, "min": -207.13262939453125, "p10": -27.928759765625, "median": 86.02955627441406, "p90": 257.20805053710944, "max": 480.9671630859375, "pos_frac": 0.84375, "sample": [-207.13262939453125, 140.1988983154297, 315.7630920410156, 173.5971221923828, 172.72305297851562, 18.206085205078125, 76.28359985351562, 46.34000778198242, 223.11111450195312, 147.35203552246094, 79.09890747070312, 237.77215576171875, 1.5440139770507812, 92.960205078125, 44.47758102416992, -17.010135650634766, 107.0175552368164, -118.77806091308594, 45.282508850097656, 62.636749267578125, 327.30487060546875, 55.641178131103516, 1.5187454223632812, 197.79550170898438, 44.94036102294922, 77.96990966796875, 97.2095947265625, 163.98016357421875, 361.7618408203125, 134.50209045410156, 194.14865112304688, -38.51361083984375, 164.6004638671875, 75.88536071777344, 25.391252517700195, 132.2405548095703, 436.29888916015625, 265.5377197265625, 44.200748443603516, 8.258045196533203, 173.4710693359375, 25.01984977722168, 72.19807434082031, 102.57821655273438, -69.906005859375, 413.1590576171875, -52.31288146972656, 65.31407928466797, -24.241180419921875, -56.62864685058594, 480.9671630859375, 162.897705078125, 71.30601501464844, 158.17767333984375, 12.168243408203125, 153.080810546875, 99.94926452636719, -27.992759704589844, 1.3201141357421875, 130.3970947265625, 151.3414306640625, 139.32264709472656, -27.77942657470703, 111.30145263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 88.75601959228516, "std": 116.54401397705078, "min": -174.64846801757812, "p10": -37.8664852142334, "median": 69.07188034057617, "p90": 227.15955810546876, "max": 404.25592041015625, "pos_frac": 0.765625, "sample": [-174.64846801757812, 213.34072875976562, -20.32874298095703, 60.55925369262695, -81.39689636230469, 271.3152160644531, 92.90447998046875, 198.61083984375, 59.36260223388672, 40.02853012084961, 81.94638061523438, 74.1592025756836, 360.31512451171875, 83.09383392333984, 159.16358947753906, 219.28079223632812, 176.95018005371094, 84.72990417480469, 133.86260986328125, -40.5673942565918, 63.98455810546875, 36.95597839355469, 404.25592041015625, -34.9400634765625, 183.00352478027344, 48.0499267578125, 227.52734375, -38.1406364440918, 211.49964904785156, 55.2733154296875, -66.35137939453125, 79.04936981201172, 24.45562744140625, -66.32249450683594, -37.22679901123047, 173.8985595703125, -34.47911834716797, 8.366086959838867, -6.029563903808594, 229.16940307617188, 164.32069396972656, 369.01788330078125, 113.09496307373047, 61.84648895263672, 57.26971435546875, 39.600486755371094, 84.27651977539062, 47.26832580566406, -140.67227172851562, 48.12786102294922, 124.87345123291016, -15.531623840332031, 187.8223876953125, 102.56222534179688, 81.92818450927734, 220.1092987060547, 96.61520385742188, -8.267494201660156, 60.740966796875, 268.2914123535156, 226.3013916015625, 38.707481384277344, -21.6919002532959, 19.092878341674805], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 107.2442626953125, "std": 133.30661010742188, "min": -167.40847778320312, "p10": -25.637125587463366, "median": 82.1508560180664, "p90": 305.19841613769535, "max": 534.2213134765625, "pos_frac": 0.796875, "sample": [339.75860595703125, 149.368408203125, 277.49176025390625, 87.50689697265625, 79.92438507080078, -14.440048217773438, -31.070587158203125, -149.19113159179688, 326.70751953125, 78.76180267333984, 58.21521759033203, -30.79521942138672, 132.59359741210938, 81.45069885253906, 311.43280029296875, 266.52423095703125, 113.19556427001953, -30.888954162597656, 67.93134307861328, 7.582246780395508, -5.6444854736328125, 28.961402893066406, 21.276092529296875, 150.95159912109375, 87.1958999633789, 30.68071746826172, 5.3505859375, -13.293231964111328, 356.8020324707031, 76.78089904785156, 413.94012451171875, -167.40847778320312, 290.6515197753906, 123.59400939941406, 156.10296630859375, 127.33613586425781, 72.844970703125, 205.05433654785156, -13.24195671081543, 218.34890747070312, 187.2849578857422, 12.171600341796875, -103.189697265625, -30.43587303161621, 215.32752990722656, 45.13806915283203, 224.71966552734375, 70.54338073730469, 26.447433471679688, 348.76025390625, 96.12848663330078, 183.477783203125, 58.49921798706055, -9.83072280883789, 191.41514587402344, 97.33265686035156, 105.21390533447266, 534.2213134765625, 102.58892822265625, 82.85101318359375, 109.638671875, -10.745012283325195, 18.61853790283203, 19.112342834472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 88.35538482666016, "std": 149.54371643066406, "min": -199.92648315429688, "p10": -86.85411300659179, "median": 70.72547912597656, "p90": 225.03657836914067, "max": 618.8902587890625, "pos_frac": 0.78125, "sample": [52.23041534423828, 189.4145965576172, -140.0286865234375, 367.659912109375, -90.82658386230469, -77.58501434326172, 44.732208251953125, 11.654243469238281, 7.880376815795898, 32.723358154296875, 156.30503845214844, 169.35269165039062, 163.98291015625, -30.303131103515625, 177.79660034179688, 56.970306396484375, 215.00637817382812, 50.15164566040039, -192.46875, 5.401638031005859, -102.0151138305664, 28.862924575805664, 96.64424133300781, 209.54849243164062, 169.2825927734375, 98.80245971679688, -43.39714050292969, 66.30531311035156, 38.84526443481445, 206.79708862304688, 0.3971824645996094, 30.15895652770996, 103.83473205566406, 546.3587646484375, 85.0020751953125, -31.237548828125, 31.18383026123047, 254.49806213378906, 88.89117431640625, 12.714363098144531, -186.0149383544922, 136.89688110351562, -62.77471923828125, 166.8011474609375, 67.82281494140625, 365.0697021484375, 168.2716522216797, 618.8902587890625, -14.732887268066406, 133.8428497314453, 179.33729553222656, 32.4821662902832, 91.4140625, 169.04013061523438, 124.83049011230469, 229.33523559570312, 290.4132080078125, 101.37803649902344, -9.244918823242188, -199.92648315429688, 73.62814331054688, -95.02411651611328, 33.5361328125, 177.94442749023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 104.86445617675781, "std": 154.56910705566406, "min": -225.5298309326172, "p10": -74.81624984741211, "median": 72.21779251098633, "p90": 341.7156616210938, "max": 481.2259826660156, "pos_frac": 0.796875, "sample": [-2.040374755859375, 120.51595306396484, 219.2001953125, 190.43560791015625, 18.822864532470703, -93.48870086669922, 311.7527770996094, 40.326942443847656, 16.17792510986328, 398.39788818359375, 56.506195068359375, 77.0810775756836, 139.49862670898438, 7.579383850097656, 392.18798828125, 152.681640625, 116.19100952148438, 81.94056701660156, 215.48245239257812, 11.693161010742188, 3.4765243530273438, 298.43341064453125, -225.5298309326172, 41.565696716308594, 279.20947265625, -24.419021606445312, 91.66192626953125, 345.10491943359375, 143.4248504638672, 64.61250305175781, -11.517776489257812, 343.343505859375, 30.164085388183594, -3.7424182891845703, 156.78042602539062, 90.258544921875, -97.98888397216797, -75.98060607910156, -145.82305908203125, 9.201828002929688, 120.6915054321289, -72.09941864013672, 110.96092987060547, 67.35450744628906, -36.76902770996094, -119.52597045898438, 123.94850158691406, 337.9173583984375, 61.191070556640625, 58.50128936767578, -206.4551544189453, 427.9117431640625, 140.68719482421875, 19.4733829498291, 213.25852966308594, 113.78609466552734, 67.28005981445312, 419.5372314453125, 5.530437469482422, 481.2259826660156, 255.2957763671875, 241.56370544433594, 51.67267990112305, 45.20782470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 150.44508361816406, "std": 166.256103515625, "min": -171.82046508789062, "p10": -22.267062377929683, "median": 127.4138298034668, "p90": 344.23927001953126, "max": 618.817626953125, "pos_frac": 0.859375, "sample": [546.436279296875, 154.46673583984375, 618.817626953125, 41.692138671875, 309.117431640625, 79.47438049316406, 158.15859985351562, 297.17816162109375, 24.803442001342773, 376.44866943359375, 101.04293823242188, 181.99276733398438, -16.176177978515625, 307.5865783691406, 313.5963134765625, 53.825904846191406, 39.30145263671875, 86.15666961669922, 40.40708923339844, 81.3498764038086, 288.44549560546875, 66.29598999023438, 63.3099365234375, 333.0574951171875, 126.2496109008789, 4.5647125244140625, 9.332355499267578, 568.220703125, 230.57337951660156, -37.25788116455078, 245.80926513671875, -85.23036193847656, 18.012588500976562, -61.39296340942383, 165.04244995117188, 179.65435791015625, 222.25564575195312, 158.98065185546875, 342.4676513671875, 28.504194259643555, 14.287210464477539, 272.9967956542969, 254.02267456054688, -171.82046508789062, 78.19659423828125, 545.6143798828125, 26.130258560180664, -84.13728332519531, 128.5780487060547, 109.51420593261719, 207.7053985595703, 344.99853515625, 15.441688537597656, -11.409870147705078, 135.557861328125, 201.288330078125, -75.8356704711914, 138.40443420410156, -24.87744140625, 139.896728515625, 389.1717529296875, 38.46984100341797, 98.21424865722656, 195.50482177734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 99.0456771850586, "std": 160.1370086669922, "min": -252.70956420898438, "p10": -62.53718948364258, "median": 75.05850219726562, "p90": 338.2466674804688, "max": 605.1001586914062, "pos_frac": 0.734375, "sample": [-61.950767517089844, -154.62461853027344, 45.313865661621094, 190.56394958496094, 151.52455139160156, -41.96625518798828, -66.21880340576172, -2.035306930541992, 37.00856018066406, -252.70956420898438, 30.899169921875, 118.93910217285156, -42.37391662597656, 220.9925994873047, 41.46458435058594, 46.90516662597656, -62.78851318359375, 108.96626281738281, -43.0203857421875, 200.6414031982422, -127.64727020263672, 448.3812561035156, -44.58388900756836, 74.770751953125, -46.32844543457031, 340.2440185546875, 376.26171875, 395.7997741699219, 190.29107666015625, 416.57037353515625, 114.03021240234375, 81.31065368652344, 605.1001586914062, 121.58328247070312, 189.70220947265625, 28.495912551879883, 125.92750549316406, 243.5644989013672, 285.3694763183594, 75.34625244140625, 33.32820129394531, 333.586181640625, 15.118783950805664, 34.514373779296875, 144.74508666992188, 61.286521911621094, 60.64582061767578, -188.83033752441406, 220.40151977539062, 88.04346466064453, 90.06364440917969, -24.5994873046875, -57.385826110839844, -82.22815704345703, 14.153018951416016, 230.6390380859375, 71.22744750976562, -35.64942932128906, 51.597564697265625, 227.79425048828125, 145.54318237304688, 118.34490966796875, 77.34327697753906, 349.5199279785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 61.21249771118164, "std": 156.23678588867188, "min": -453.80279541015625, "p10": -79.26501007080078, "median": 58.86513900756836, "p90": 266.8486328125, "max": 382.3663330078125, "pos_frac": 0.6875, "sample": [118.79603576660156, -11.233428955078125, 28.52832794189453, 206.5533447265625, -453.80279541015625, 100.16305541992188, 80.74813079833984, 60.240318298339844, -425.05267333984375, 353.9803161621094, 62.266937255859375, -6.107503890991211, 233.2176513671875, -31.604427337646484, -38.036407470703125, 109.4923095703125, 100.03195190429688, 143.64846801757812, 2.5683517456054688, 335.8323974609375, 2.168153762817383, -23.227745056152344, 24.498920440673828, 188.1429443359375, 27.045766830444336, 138.59912109375, -56.498714447021484, -61.51276397705078, 12.278350830078125, -72.30807495117188, -16.540790557861328, 263.921142578125, -79.33688354492188, -109.9801254272461, 268.103271484375, -134.40536499023438, 186.32000732421875, -108.8069076538086, 8.182962417602539, 57.489959716796875, 15.003433227539062, 185.443359375, 298.64996337890625, 191.33477783203125, 124.82913208007812, -141.29510498046875, 353.7724609375, 97.0594253540039, 130.61569213867188, 122.0564956665039, -69.24575805664062, 105.41375732421875, 11.875782012939453, -79.09730529785156, -72.325439453125, 139.88720703125, 382.3663330078125, 132.38540649414062, -63.79478073120117, 13.276199340820312, 310.43115234375, 99.87824249267578, 24.1533203125, 120.5621109008789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 92.10917663574219, "std": 111.39064025878906, "min": -132.51730346679688, "p10": -53.421557617187496, "median": 75.19519805908203, "p90": 263.03357543945316, "max": 320.6788330078125, "pos_frac": 0.8125, "sample": [158.2933349609375, 18.67469024658203, 39.76123046875, 13.074172973632812, 142.20484924316406, 280.1697998046875, 109.17277526855469, 170.81080627441406, -27.067886352539062, 28.232139587402344, 36.74943542480469, 273.58160400390625, -123.58262634277344, -78.25543212890625, 64.40711212158203, 75.07742309570312, 224.77069091796875, 64.73759460449219, 74.57583618164062, 139.85504150390625, 29.740962982177734, 85.48526000976562, 30.967548370361328, 8.367923736572266, 127.6697769165039, -88.94456481933594, -1.3910980224609375, 231.26007080078125, 70.35834503173828, 81.09762573242188, 265.80731201171875, 142.70716857910156, 270.02630615234375, 188.42333984375, 30.502182006835938, 313.304443359375, 72.49186706542969, 34.371620178222656, 163.1952667236328, 48.82200622558594, 196.82691955566406, -117.63888549804688, 46.75849914550781, -5.547035217285156, 106.99055480957031, 170.14334106445312, -17.875362396240234, -132.51730346679688, 40.90118408203125, -56.4765625, 110.79899597167969, -75.99474334716797, 59.58140563964844, 256.5615234375, 97.075927734375, 316.7276916503906, 141.4410400390625, 75.31297302246094, 320.6788330078125, 91.88723754882812, 183.34347534179688, 205.92141723632812, 136.87387084960938, -46.293212890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 102.76541900634766, "std": 126.4919662475586, "min": -114.46044158935547, "p10": -45.087635803222646, "median": 91.86490249633789, "p90": 266.3662139892578, "max": 430.4010925292969, "pos_frac": 0.796875, "sample": [91.64221954345703, -37.21307373046875, 232.83396911621094, 32.38790512084961, 79.33514404296875, 430.4010925292969, 77.61173248291016, 344.41217041015625, 233.1947021484375, -14.355674743652344, -1.055328369140625, 56.386474609375, 8.917137145996094, -90.38795471191406, 261.20135498046875, 124.92416381835938, 62.440826416015625, 245.0864715576172, -24.523277282714844, 350.87933349609375, -28.491985321044922, 110.84672546386719, -113.72589111328125, 51.148468017578125, 126.68963623046875, 118.86054229736328, 264.8210144042969, 314.6324462890625, 152.23548889160156, 130.60128784179688, 169.85629272460938, -79.56928253173828, 106.5959701538086, 196.263916015625, 21.53460693359375, 171.92918395996094, -33.70579528808594, 5.215293884277344, 194.63552856445312, 230.3665008544922, -114.46044158935547, 287.86944580078125, 38.718387603759766, 93.9457778930664, 410.1690979003906, 149.846435546875, 92.86211395263672, 59.84989929199219, -60.65593719482422, 7.807493209838867, 183.55667114257812, -111.49884033203125, 267.0284423828125, 109.21661376953125, 29.411056518554688, 92.08758544921875, 33.07061004638672, 45.025550842285156, 18.059965133666992, 84.51067352294922, -48.46244812011719, 63.681400299072266, 151.7658233642578, 118.72178649902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 114.748291015625, "std": 155.8814239501953, "min": -244.244140625, "p10": -47.437404251098634, "median": 76.84928512573242, "p90": 335.2352844238282, "max": 493.5089111328125, "pos_frac": 0.765625, "sample": [219.50079345703125, 213.9188232421875, 341.4269104003906, 237.4072723388672, -47.64893341064453, 342.381591796875, 321.1572265625, 471.2186279296875, 253.01104736328125, 67.37042236328125, 5.485950469970703, 51.01612091064453, 309.7813720703125, 194.08135986328125, 55.041778564453125, 11.84050178527832, -16.828815460205078, 103.33280944824219, -15.332061767578125, 44.56123352050781, 492.2105712890625, -15.426742553710938, 59.18006896972656, 61.561954498291016, 141.21591186523438, 151.83273315429688, 128.4464569091797, 154.004150390625, 55.50029754638672, -11.106502532958984, 93.32577514648438, 71.14585876464844, 456.86199951171875, 493.5089111328125, -29.628814697265625, 195.28941345214844, 98.33187866210938, 67.53474426269531, 116.76214599609375, 102.91879272460938, 66.9013900756836, 47.551910400390625, 249.55615234375, -3.8842620849609375, 147.30535888671875, 48.617618560791016, 78.21802520751953, -160.57846069335938, 75.48054504394531, -244.244140625, -59.736572265625, 148.32830810546875, 99.81449890136719, -46.9438362121582, 341.26873779296875, -54.561729431152344, -142.08456420898438, 67.16294860839844, -4.6208343505859375, 137.47216796875, 284.3099670410156, 320.4368896484375, -103.378662109375, 5.305767059326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 85.97522735595703, "std": 124.32783508300781, "min": -128.7225799560547, "p10": -52.610140991210926, "median": 49.36842727661133, "p90": 246.08966522216798, "max": 467.46954345703125, "pos_frac": 0.78125, "sample": [57.40428924560547, 329.0471496582031, 188.31251525878906, -44.88557434082031, 166.6036834716797, 9.558738708496094, -18.183212280273438, -78.150390625, 69.2956771850586, 467.46954345703125, 402.5940246582031, 248.77879333496094, 72.03372955322266, 133.7310791015625, -114.83589935302734, 10.234161376953125, 138.63836669921875, 289.1414794921875, 26.382423400878906, 34.6233024597168, 104.24409484863281, 219.4975128173828, 36.17448043823242, -5.59796142578125, 50.852088928222656, 4.025108337402344, -103.79361724853516, 143.7056884765625, 77.8583755493164, 98.89546203613281, 272.9336853027344, 145.17637634277344, 41.588539123535156, 239.81503295898438, 0.094512939453125, 201.43922424316406, -55.92066955566406, -3.6789703369140625, -63.179779052734375, -128.7225799560547, 38.94041442871094, 126.25399017333984, -1.4032821655273438, 1.7701797485351562, 23.42091178894043, 100.29238891601562, 325.5601806640625, 222.038330078125, 15.412506103515625, 220.27613830566406, -14.214761734008789, 47.384586334228516, 61.42029571533203, 16.311439514160156, 17.00499725341797, 131.94989013671875, 191.71566772460938, 99.02693176269531, 223.52676391601562, -26.227920532226562, -68.54742431640625, 19.764869689941406, 19.651824951171875, 47.884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 113.95219421386719, "std": 153.2172088623047, "min": -244.38531494140625, "p10": -36.1695442199707, "median": 97.26653671264648, "p90": 254.08613739013674, "max": 641.1304931640625, "pos_frac": 0.828125, "sample": [-134.19235229492188, 106.01863098144531, -32.69152069091797, 531.079345703125, 123.85537719726562, 205.87713623046875, 374.4460144042969, 73.62258911132812, 167.11863708496094, 44.78630828857422, 114.91973876953125, 200.05014038085938, -140.79287719726562, 25.58061981201172, -1.735809326171875, 251.2617950439453, 160.98541259765625, 136.97940063476562, 96.52072143554688, 96.73493957519531, -37.660125732421875, -53.48869323730469, 38.80644989013672, 201.95204162597656, 14.62354850769043, 97.79813385009766, -65.65262603759766, 206.67117309570312, 83.34820556640625, 92.21011352539062, 88.41875457763672, 127.3683090209961, 154.66806030273438, 83.44013977050781, 141.47190856933594, 5.895298004150391, 91.36796569824219, 168.83346557617188, 172.58547973632812, 255.29656982421875, -244.38531494140625, 571.05517578125, 108.5486068725586, 65.23892974853516, 1.3236122131347656, 18.857589721679688, 258.5244445800781, -41.20841979980469, 3.4686756134033203, 79.56510925292969, 45.19623947143555, 111.41546630859375, 105.59310913085938, 149.2850341796875, -18.61551856994629, 138.32745361328125, 168.8471221923828, 220.36773681640625, 25.10930824279785, 455.22186279296875, -21.745758056640625, 641.1304931640625, 45.479698181152344, 137.96173095703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 112.59829711914062, "std": 152.4805908203125, "min": -208.51266479492188, "p10": -55.11785087585449, "median": 95.36407852172852, "p90": 319.92273254394536, "max": 502.9893798828125, "pos_frac": 0.796875, "sample": [232.492431640625, 178.17991638183594, -208.51266479492188, -7.59796142578125, 289.77435302734375, 91.87660217285156, -58.329437255859375, 215.32264709472656, -190.87637329101562, 106.36946868896484, 7.783348083496094, -76.62905883789062, -194.50271606445312, 198.5885467529297, 225.32791137695312, 65.54344940185547, 24.883140563964844, 142.40341186523438, 421.15234375, 43.632301330566406, 92.39410400390625, 188.35801696777344, 377.59539794921875, 326.8572998046875, -80.55206298828125, 38.62147521972656, -32.882118225097656, 125.39545440673828, 221.46243286132812, 27.435134887695312, 36.3228759765625, 285.3888854980469, 502.9893798828125, 95.46234130859375, 95.55791473388672, 236.41241455078125, -17.277801513671875, 134.72763061523438, 4.13507080078125, 112.274169921875, 92.29788208007812, 311.0058288574219, 131.97314453125, 33.89101791381836, 323.7442626953125, 141.19732666015625, 80.15625, -17.636842727661133, -47.624149322509766, -164.1359100341797, 288.0155334472656, 93.88744354248047, 120.57583618164062, 78.8359603881836, 384.441650390625, 183.15443420410156, 9.251863479614258, 95.26581573486328, 72.26354217529297, 97.65675354003906, 48.78251647949219, 156.12313842773438, 450.03082275390625, -34.42291259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 114.9112548828125, "std": 176.0589141845703, "min": -319.58477783203125, "p10": -54.677150726318345, "median": 88.48546981811523, "p90": 373.31447143554686, "max": 588.8513793945312, "pos_frac": 0.78125, "sample": [247.13973999023438, 118.27777099609375, 63.40278244018555, 457.2017517089844, 48.079471588134766, 165.01177978515625, 15.658599853515625, -3.673004150390625, 485.54656982421875, -88.56147766113281, 372.8968811035156, 272.2334289550781, 120.5500259399414, 27.522197723388672, 87.49031066894531, 370.1070251464844, 241.8203582763672, 419.1102294921875, -5.5948028564453125, 373.4934387207031, 75.0849609375, -9.568819046020508, 171.6697998046875, 244.5955352783203, -33.3121452331543, 66.81321716308594, 184.48269653320312, -238.05746459960938, 588.8513793945312, 112.55879974365234, 123.53443908691406, -36.467655181884766, 2.2077178955078125, 78.32022094726562, 56.335784912109375, -29.29670524597168, -319.58477783203125, 37.84375, 97.16761779785156, 387.1054992675781, 0.8818264007568359, 98.4874496459961, 68.3096923828125, 1.1055221557617188, 6.701259613037109, -196.368896484375, 89.48062896728516, -60.57817077636719, 325.0871887207031, -40.908103942871094, 7.508583068847656, 465.752197265625, 51.042572021484375, 18.39440155029297, 112.92851257324219, 299.829345703125, 186.2855224609375, 183.6160888671875, 107.89173889160156, 202.81942749023438, -127.56254577636719, 149.95689392089844, -82.16072082519531, 137.82315063476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 113.26232147216797, "std": 160.2694549560547, "min": -322.4629821777344, "p10": -49.13734588623046, "median": 89.63496017456055, "p90": 302.9373809814453, "max": 528.8757934570312, "pos_frac": 0.796875, "sample": [281.8493957519531, 4.866941452026367, 237.8782196044922, -229.51565551757812, 61.68346405029297, -120.97706604003906, 6.578332901000977, 16.969762802124023, -25.064422607421875, 135.24118041992188, 6.139923095703125, 38.67535400390625, 53.18426513671875, -3.7138824462890625, 513.18017578125, 115.62834930419922, 62.22613525390625, 94.2801513671875, 30.689247131347656, 268.3446044921875, 406.53302001953125, 124.98043060302734, -1.8793792724609375, 116.54243469238281, 227.93020629882812, 299.453125, -85.70634460449219, 251.17909240722656, 209.172119140625, -22.600141525268555, 82.11514282226562, 142.43222045898438, 147.80538940429688, 120.47323608398438, -50.749786376953125, -78.58438110351562, 191.43008422851562, 80.91817474365234, 67.64254760742188, 528.8757934570312, 152.37652587890625, -39.03295135498047, 184.48318481445312, 87.32633972167969, 72.8021240234375, 184.67115783691406, 171.38720703125, 275.3048095703125, 413.3324890136719, 75.92303466796875, 91.9435806274414, -86.18476867675781, 163.7754364013672, 62.763641357421875, -322.4629821777344, 326.793701171875, 478.9062805175781, 151.7911834716797, 9.788690567016602, -45.37498474121094, 304.4306335449219, 67.67772674560547, 3.8223724365234375, 156.43658447265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 79.567626953125, "std": 158.5449981689453, "min": -229.9878692626953, "p10": -108.99865188598629, "median": 66.9672622680664, "p90": 285.6071685791016, "max": 584.6563110351562, "pos_frac": 0.703125, "sample": [584.6563110351562, -54.56391525268555, 145.80517578125, 348.62591552734375, 4.373254776000977, -65.97100067138672, 218.56427001953125, -60.22395324707031, 133.21011352539062, 99.5665512084961, 52.102684020996094, -229.9878692626953, 198.19729614257812, 257.8448181152344, 247.68313598632812, 51.5753173828125, -210.62139892578125, 69.98834228515625, 125.33895874023438, 389.17108154296875, 127.22203826904297, 310.2708740234375, -145.38192749023438, 195.96353149414062, 23.667076110839844, 38.99012756347656, 430.39666748046875, -22.521146774291992, 114.88395690917969, 160.1004638671875, 101.47245788574219, 118.30104064941406, 279.14910888671875, -39.49662780761719, -45.74871826171875, -181.91175842285156, 86.17835998535156, 62.98736572265625, 2.1967086791992188, 251.90460205078125, 112.34835815429688, -1.18890380859375, 19.22635269165039, 288.3749084472656, 191.33013916015625, 125.80360412597656, 90.22013092041016, -228.32339477539062, 63.94618225097656, 109.65692138671875, 17.700634002685547, 51.699119567871094, -37.03819274902344, -77.1779556274414, 43.3128662109375, -122.63609313964844, 98.87775421142578, -77.1065902709961, -36.13252639770508, 54.93476867675781, 294.17095947265625, -134.53233337402344, 103.37167358398438, -32.46938705444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 96.51522827148438, "std": 135.33206176757812, "min": -197.151611328125, "p10": -64.71426239013671, "median": 77.49422073364258, "p90": 292.363427734375, "max": 485.77020263671875, "pos_frac": 0.765625, "sample": [20.713581085205078, 147.38851928710938, 166.78350830078125, 68.33641815185547, -34.2070198059082, 298.4517517089844, -87.65475463867188, 165.4903564453125, 34.6708984375, 135.82687377929688, -9.345016479492188, 113.23725128173828, 97.11824798583984, 193.03958129882812, -117.56553649902344, 199.80140686035156, 186.4351043701172, -32.57426452636719, 202.36532592773438, 141.94602966308594, 291.6210021972656, 429.101806640625, -75.88671875, 127.02621459960938, 141.382080078125, 33.640380859375, 485.77020263671875, 17.868446350097656, -101.43026733398438, 161.3188934326172, -1.0730972290039062, 185.17352294921875, 181.65185546875, 302.2782897949219, 29.473876953125, 186.0570831298828, 82.36038970947266, 113.95796966552734, 95.82278442382812, 122.87207794189453, 52.66716003417969, 69.71732330322266, 47.224327087402344, 292.6816101074219, -35.982337951660156, -197.151611328125, -58.67869567871094, 71.84809875488281, -41.968177795410156, 10.474651336669922, 181.70889282226562, 29.105873107910156, 12.292484283447266, 47.61961364746094, -67.30093383789062, -128.28387451171875, 166.89865112304688, 72.6280517578125, 307.7018737792969, 374.8561096191406, -23.148134231567383, 249.3810272216797, 25.274179458618164, 18.163299560546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 87.84043884277344, "std": 165.13986206054688, "min": -297.27734375, "p10": -89.86890258789059, "median": 74.82231903076172, "p90": 337.3122955322268, "max": 530.3018188476562, "pos_frac": 0.71875, "sample": [239.0449676513672, 80.36822509765625, 57.77732467651367, 37.833885192871094, 109.15473175048828, -32.57814025878906, 246.78143310546875, -117.2163314819336, 82.75133514404297, 18.0499210357666, -25.828510284423828, 21.092084884643555, 18.403274536132812, 96.86063385009766, 55.60231018066406, -27.405662536621094, -297.27734375, 208.4176025390625, 71.20036315917969, 9.421892166137695, 253.4452362060547, -155.32272338867188, -33.80555725097656, 0.5805168151855469, 78.44427490234375, -15.426151275634766, 89.01849365234375, 165.81976318359375, 160.5464324951172, -102.99285888671875, 426.64892578125, -30.91211700439453, 97.06874084472656, 39.91236114501953, -56.08110046386719, 154.28598022460938, 54.92796325683594, 151.72265625, 488.0220947265625, -258.57147216796875, 434.74249267578125, 180.00454711914062, 195.0380401611328, 80.54576110839844, 91.244384765625, -2.7876148223876953, 47.07421875, 269.78436279296875, 366.2528381347656, -35.496307373046875, 82.17314147949219, 178.63919067382812, -136.56076049804688, 60.48474884033203, 530.3018188476562, 402.77362060546875, -44.85148620605469, 158.02752685546875, 27.790254592895508, 384.2336730957031, -136.38330078125, 80.65791320800781, 107.5596923828125, -59.246337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 115.23167419433594, "std": 155.49978637695312, "min": -271.9974670410156, "p10": -78.42567596435546, "median": 121.28291702270508, "p90": 282.7608215332031, "max": 452.1946716308594, "pos_frac": 0.765625, "sample": [40.51374816894531, 141.2757568359375, 235.06765747070312, 382.1746520996094, 130.22320556640625, 18.15690040588379, -10.030311584472656, 38.9256591796875, -2.4035797119140625, 242.91506958007812, 281.25054931640625, -170.5091552734375, -111.55772399902344, 110.43643951416016, -23.491676330566406, 96.680419921875, 16.884807586669922, 400.0318298339844, 261.9892578125, 33.95321273803711, 109.21421813964844, 100.34371948242188, -155.64215087890625, 222.8120574951172, 342.4694519042969, 261.4033508300781, 222.6871337890625, -10.347797393798828, -80.11075592041016, 213.18612670898438, -53.41679763793945, 432.1239318847656, 127.45155334472656, 82.51302337646484, 39.48310089111328, 153.99855041503906, 452.1946716308594, 267.59942626953125, 127.81858825683594, 132.65065002441406, -246.33847045898438, 164.83311462402344, 132.39540100097656, 238.65223693847656, 173.93301391601562, -76.46490478515625, 147.5033416748047, 108.11436462402344, 272.96868896484375, 40.705345153808594, 283.4080810546875, 203.8950958251953, 187.9185333251953, 79.28693389892578, 168.01792907714844, 381.0239562988281, 115.1142807006836, -2.6060562133789062, 25.319076538085938, -271.9974670410156, -79.26600646972656, 224.94683837890625, -37.41845703125, 39.9637451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 125.78593444824219, "std": 136.25328063964844, "min": -254.62460327148438, "p10": -38.877788162231425, "median": 107.30575180053711, "p90": 289.9626159667969, "max": 436.672119140625, "pos_frac": 0.84375, "sample": [52.085052490234375, 184.95953369140625, 106.73774719238281, 126.82734680175781, 436.672119140625, 342.29400634765625, 79.77244567871094, 68.49297332763672, 220.57798767089844, 267.64227294921875, 65.98257446289062, 166.7141876220703, 32.182456970214844, 100.95455169677734, -254.62460327148438, 51.06460952758789, 85.77081298828125, 159.9831085205078, 62.34941101074219, -7.002981185913086, 246.16827392578125, -58.408302307128906, 276.8215026855469, 373.0720520019531, 105.62730407714844, 190.07144165039062, 26.05322265625, 201.53260803222656, -67.74929809570312, -118.67279815673828, 187.93678283691406, 333.31561279296875, 210.9111328125, 259.25140380859375, 17.918855667114258, 283.934814453125, 66.04251861572266, -46.87362289428711, 155.91439819335938, -152.90887451171875, 53.78247833251953, 221.92840576171875, -5.067104339599609, 160.84628295898438, 88.42832946777344, -20.220840454101562, 149.1783447265625, 97.69280242919922, 217.0471649169922, 89.44415283203125, -70.87599182128906, 21.50310516357422, 129.68057250976562, 195.355712890625, 416.71954345703125, 292.54595947265625, 0.8944377899169922, 143.36019897460938, 107.8737564086914, 283.9291687011719, 78.14818572998047, 10.552438735961914, 333.03961181640625, 215.11831665039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 141.111328125, "std": 161.1201629638672, "min": -126.29772186279297, "p10": -29.498224639892577, "median": 105.85905838012695, "p90": 326.0705444335938, "max": 595.3803100585938, "pos_frac": 0.8125, "sample": [19.625408172607422, -126.29772186279297, 60.16857147216797, 113.35931396484375, 178.89959716796875, 55.813140869140625, 317.1396484375, -30.14300537109375, -17.008529663085938, -5.256866455078125, 100.65179443359375, 108.17192840576172, 329.8980712890625, -20.066299438476562, 148.45706176757812, 177.25808715820312, 46.846710205078125, 116.92094421386719, 28.621841430664062, 48.954994201660156, 63.754085540771484, 183.65829467773438, 33.30278778076172, 260.84857177734375, 31.30120849609375, -61.440643310546875, 299.02288818359375, 81.84219360351562, 26.49727439880371, -56.98324966430664, 300.0556335449219, 288.5684814453125, -50.24864959716797, -3.69427490234375, 32.64349365234375, 282.04925537109375, 43.91386413574219, 234.8863067626953, 160.8236846923828, 249.54661560058594, 270.7590026855469, 139.17318725585938, 103.6141357421875, 126.5638427734375, 232.576171875, 259.73455810546875, 108.1039810180664, 410.51458740234375, 563.9306640625, 292.33404541015625, 40.38581085205078, 470.3856201171875, -54.258262634277344, 54.02136993408203, 595.3803100585938, 44.627777099609375, 83.97401428222656, 113.27871704101562, 88.06478881835938, 536.9479370117188, -82.05228424072266, 181.07540893554688, 427.6212463378906, -27.993736267089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 115.3888168334961, "std": 171.09194946289062, "min": -382.09039306640625, "p10": -35.48091201782226, "median": 100.04993057250977, "p90": 361.731268310547, "max": 472.6363525390625, "pos_frac": 0.796875, "sample": [-233.6992645263672, 168.59230041503906, -164.36985778808594, 298.07159423828125, 18.212860107421875, -13.247455596923828, 413.745361328125, 38.19517517089844, 133.9781036376953, 278.1883544921875, 239.0946502685547, 100.32279205322266, 67.7882080078125, 159.2100830078125, 232.78240966796875, 54.99559783935547, 472.6363525390625, 14.104827880859375, 470.83721923828125, 122.520751953125, 7.32672119140625, 33.199485778808594, 274.17608642578125, 182.48028564453125, 99.77706909179688, 15.377815246582031, 85.92037963867188, 19.400543212890625, -151.23316955566406, 251.13693237304688, -154.9383087158203, -4.924533843994141, 63.71939468383789, 134.26315307617188, 146.99118041992188, 322.83636474609375, 289.7458190917969, 45.88309860229492, -20.744956970214844, 238.93185424804688, -28.606231689453125, 54.93241882324219, 17.016952514648438, 381.9903564453125, 180.0515594482422, 378.4005126953125, 109.97264099121094, -382.09039306640625, -35.69043731689453, 179.32049560546875, 24.320297241210938, 127.56806945800781, 402.77227783203125, 120.44049072265625, 62.77058410644531, -153.33612060546875, 20.967483520507812, 418.0662536621094, 169.043701171875, 310.30645751953125, 259.163330078125, 82.46629333496094, -34.99201965332031, -31.2559814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 100.72296142578125, "std": 162.14146423339844, "min": -357.60986328125, "p10": -80.93580322265625, "median": 90.4087028503418, "p90": 332.4194549560547, "max": 456.00201416015625, "pos_frac": 0.765625, "sample": [370.88311767578125, 87.99319458007812, 187.7848358154297, -61.331443786621094, 406.6119384765625, 456.00201416015625, 323.3616027832031, 113.48072814941406, 146.35137939453125, 102.39263916015625, -82.16102600097656, 114.33455657958984, 208.8328857421875, 46.092247009277344, 174.91615295410156, 140.59854125976562, 165.5260467529297, -148.96304321289062, -78.00932312011719, 84.92362213134766, 281.7765808105469, 242.40606689453125, 0.2864093780517578, -85.00192260742188, 55.396209716796875, -39.08680725097656, 8.243276596069336, 8.683082580566406, 97.98554992675781, 142.3347930908203, 128.54364013671875, 92.82421112060547, -170.80630493164062, 22.284114837646484, 81.61724090576172, 59.39227294921875, -78.07695007324219, -39.474178314208984, 408.531494140625, 308.81243896484375, 59.60955810546875, 440.1358642578125, 7.3270263671875, -42.66808319091797, -33.9830322265625, 60.810707092285156, 63.51451110839844, 78.33432006835938, 246.2838592529297, 142.5556182861328, 127.80624389648438, -48.5263671875, -165.22518920898438, -357.60986328125, 181.60049438476562, 106.63345336914062, 59.42778396606445, 57.316856384277344, 154.65823364257812, -162.69635009765625, 336.3013916015625, 346.47271728515625, 223.36550903320312, 278.5323791503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 115.35027313232422, "std": 152.33839416503906, "min": -274.33013916015625, "p10": -32.972557067871094, "median": 92.74259185791016, "p90": 326.32098083496095, "max": 487.18145751953125, "pos_frac": 0.765625, "sample": [-32.255157470703125, 20.0789737701416, 135.78717041015625, 98.36495971679688, -4.5953826904296875, -54.15671157836914, 189.4395751953125, -22.2342586517334, 128.7503662109375, -29.72679901123047, 198.09808349609375, 38.124610900878906, 17.93465805053711, 154.98324584960938, 297.59002685546875, 159.2169189453125, 4.7329864501953125, 67.86407470703125, 217.69479370117188, 35.35464859008789, 284.569091796875, 122.15277099609375, 21.71134376525879, 265.116943359375, 325.0049133300781, -7.314374923706055, 12.244049072265625, 299.8542785644531, 226.12106323242188, 385.18060302734375, 126.83951568603516, 226.20591735839844, 116.59394073486328, 326.885009765625, -107.5379638671875, -44.40132141113281, -93.44100952148438, -5.3643341064453125, 51.149314880371094, 39.074275970458984, 148.5176544189453, 221.295166015625, 338.66888427734375, 102.20181274414062, -1.588369369506836, 12.153167724609375, 449.6833190917969, -33.28001403808594, 120.35954284667969, 154.4659423828125, 317.97674560546875, 487.18145751953125, 309.30218505859375, 70.7686538696289, -88.87110137939453, 87.12022399902344, -30.34502410888672, 19.066844940185547, 14.238872528076172, 327.8115234375, 12.874542236328125, 19.448936462402344, -274.33013916015625, 406.00579833984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 100.20367431640625, "std": 177.7362823486328, "min": -244.78607177734375, "p10": -68.40710525512695, "median": 65.48396301269531, "p90": 270.82285461425784, "max": 664.21435546875, "pos_frac": 0.734375, "sample": [114.09436798095703, -22.682828903198242, 270.90386962890625, 82.91497039794922, 65.24276733398438, 178.7620086669922, -70.28492736816406, 94.69266510009766, 573.4480590820312, -244.78607177734375, -48.17095184326172, 52.23651885986328, 28.51706314086914, 0.94891357421875, 71.98199462890625, 240.8106231689453, 179.9924774169922, 163.31817626953125, 141.2073974609375, -3.8686065673828125, 17.03575897216797, -36.468353271484375, 236.263916015625, 118.4967041015625, -58.54816436767578, 25.648269653320312, -64.02552032470703, 562.5338134765625, 44.74580001831055, 664.21435546875, 528.1312255859375, -62.46593475341797, -104.48760986328125, 167.20310974121094, 195.75726318359375, 346.2234802246094, -144.93914794921875, 143.8473358154297, 48.53410339355469, -7.9037933349609375, 125.63346862792969, 449.48468017578125, -33.934730529785156, 79.27630615234375, 16.776243209838867, 60.97660827636719, 66.23786926269531, -100.06452941894531, 26.84231948852539, 246.71957397460938, -161.76734924316406, 65.72515869140625, 222.8512725830078, 41.56199645996094, 166.14012145996094, 270.6338195800781, 85.23270416259766, 264.8951721191406, 20.591156005859375, 139.43695068359375, 61.221187591552734, 22.226318359375, -42.33857727050781, -170.39743041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 140.22247314453125, "std": 183.53509521484375, "min": -249.16175842285156, "p10": -52.058607482910155, "median": 142.3883514404297, "p90": 326.20615234375003, "max": 646.9205322265625, "pos_frac": 0.75, "sample": [-249.16175842285156, -105.75211334228516, 363.3981628417969, 247.5980224609375, 63.78003692626953, 248.76327514648438, 96.67303466796875, -46.110713958740234, -49.94099426269531, -4.9731292724609375, 269.73736572265625, 200.51473999023438, -225.96339416503906, -52.966156005859375, -19.987438201904297, 3.5506973266601562, 327.80828857421875, 229.44778442382812, 465.35870361328125, 278.6951904296875, 103.31217956542969, -105.55335235595703, 25.484577178955078, 160.1409454345703, 143.45858764648438, 170.80349731445312, 243.2779541015625, -91.4766845703125, 279.3453369140625, -14.215097427368164, 135.4971923828125, 194.8725128173828, 646.9205322265625, 192.5043487548828, 47.02625274658203, -239.73561096191406, 315.9053955078125, -21.719772338867188, 141.318115234375, 628.5801391601562, 316.352783203125, 102.8902587890625, 44.38964080810547, 246.81005859375, 10.264312744140625, 122.29949951171875, -44.59524917602539, 50.482364654541016, 297.7353210449219, 31.735462188720703, 158.46734619140625, 401.427001953125, 420.0850830078125, 255.67623901367188, 320.2906494140625, -16.711612701416016, 267.6805419921875, 57.82871627807617, 182.263916015625, 172.67723083496094, 66.35884094238281, 322.46783447265625, -40.52783203125, 231.673583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 138.56756591796875, "std": 162.76654052734375, "min": -213.27188110351562, "p10": -55.50384521484374, "median": 140.44947052001953, "p90": 359.7804168701172, "max": 483.1709899902344, "pos_frac": 0.828125, "sample": [322.0447692871094, -67.45022583007812, 40.791404724121094, -110.64083099365234, 235.98753356933594, 12.226318359375, 46.392799377441406, 140.35511779785156, -213.27188110351562, -46.155487060546875, 10.65434455871582, 345.06536865234375, 350.7823791503906, -23.92894172668457, 103.5309066772461, 186.8848114013672, 1.7229862213134766, 63.1343994140625, 176.80059814453125, 331.8663330078125, 468.57550048828125, 346.9226379394531, 39.14588165283203, 447.4244384765625, 159.87306213378906, 299.351318359375, 407.84112548828125, 10.775875091552734, -19.338233947753906, 44.62107467651367, 195.13998413085938, 140.5438232421875, 191.8470458984375, 142.68630981445312, -59.510284423828125, -8.711181640625, 164.17694091796875, 380.9950866699219, 314.7790832519531, 179.04751586914062, 9.139331817626953, 218.31761169433594, 369.0147399902344, -81.55838012695312, 233.6864776611328, 52.3643798828125, 191.49359130859375, 483.1709899902344, -147.7799072265625, 236.2353515625, 268.0892639160156, 317.15228271484375, 363.63671875, 31.785663604736328, 35.99659729003906, 4.755109786987305, -107.52046966552734, 65.31741333007812, 105.77923583984375, 67.86505889892578, 22.12714385986328, 35.945953369140625, 198.3126220703125, 142.01763916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 123.68592071533203, "std": 161.8976287841797, "min": -157.09262084960938, "p10": -50.01912307739258, "median": 93.59940719604492, "p90": 324.3116333007813, "max": 598.76953125, "pos_frac": 0.765625, "sample": [50.998085021972656, 441.7620544433594, 33.430538177490234, 204.97613525390625, -7.70521354675293, -9.58714485168457, -109.59440612792969, -157.09262084960938, 308.8550109863281, -83.7474365234375, 183.838623046875, 20.298240661621094, 120.80475616455078, -0.9977741241455078, 48.304534912109375, 86.59872436523438, 218.2027587890625, -28.300378799438477, 64.74241638183594, 213.99813842773438, 66.58380126953125, 100.60009002685547, 126.63648223876953, 275.9643859863281, 273.0730285644531, 72.75538635253906, 8.682136535644531, -17.45164680480957, 130.1011962890625, -46.23069763183594, 104.89358520507812, 78.2092056274414, 6.293693542480469, 359.5804748535156, -126.44327545166016, 598.76953125, 347.4910583496094, 26.255813598632812, 50.37745666503906, 425.3111267089844, 320.1197509765625, 241.48529052734375, 293.8182373046875, 319.72137451171875, -143.4246368408203, 549.359375, 37.952735900878906, 191.67138671875, 225.16693115234375, 109.6899642944336, -72.28556060791016, 171.60560607910156, -49.802215576171875, 129.15982055664062, -48.36016845703125, 267.12646484375, 72.65003967285156, 326.108154296875, 63.802886962890625, 189.43032836914062, 118.83927917480469, 117.00785827636719, -50.112083435058594, 73.93038940429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 124.7989730834961, "std": 151.31007385253906, "min": -186.00454711914062, "p10": -73.38396682739257, "median": 103.26169967651367, "p90": 314.5077362060547, "max": 559.4746704101562, "pos_frac": 0.8125, "sample": [52.00771713256836, 347.4083557128906, 129.93089294433594, 285.3324890136719, -67.54544067382812, -83.52876281738281, 56.430442810058594, 130.4471435546875, 172.8570098876953, 101.67259216308594, 89.36561584472656, 41.15950012207031, 30.05747413635254, -75.88619232177734, 468.623046875, 301.4438781738281, 255.39163208007812, 315.1441650390625, 559.4746704101562, 173.15005493164062, 86.33082580566406, 422.96697998046875, 104.66519927978516, -122.18739318847656, 82.09016418457031, 215.64602661132812, -80.28775024414062, 86.3645248413086, 179.38673400878906, 237.43185424804688, 44.85252380371094, 96.27101135253906, 86.83010864257812, -23.957387924194336, 196.60659790039062, 101.85820007324219, -186.00454711914062, 146.37034606933594, 233.40725708007812, 302.796875, 381.59619140625, 121.18009185791016, 198.2314453125, -15.406326293945312, 177.78729248046875, 66.56855773925781, 55.363487243652344, -50.06788635253906, 36.48611068725586, 231.623779296875, -155.2815399169922, 151.94406127929688, 400.6107177734375, 33.982421875, 54.06660461425781, 112.89263916015625, 313.0227355957031, -0.2218475341796875, 186.48260498046875, 37.46409606933594, 117.98654174804688, 11.23149299621582, -101.48236083984375, 126.6991195678711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 124.37261199951172, "std": 180.4086456298828, "min": -347.3656005859375, "p10": -85.40098571777344, "median": 90.51053237915039, "p90": 374.9455017089844, "max": 561.5778198242188, "pos_frac": 0.75, "sample": [229.93988037109375, 423.857421875, -101.5009994506836, 141.306884765625, 209.52426147460938, 54.558937072753906, -164.47836303710938, 186.44708251953125, 234.63941955566406, 26.20233917236328, 561.5778198242188, 125.54338073730469, 124.30012512207031, -112.10362243652344, -18.98086929321289, 14.803070068359375, -98.02555847167969, 250.73614501953125, 41.01346969604492, -228.85696411132812, 17.33416748046875, 366.73284912109375, -6.6361236572265625, -4.443389892578125, 97.74955749511719, 185.31944274902344, 38.654998779296875, 182.31259155273438, 70.04013061523438, -13.189104080200195, 101.11676025390625, 194.37460327148438, 46.416175842285156, -47.503746032714844, 357.5691223144531, 434.20965576171875, -88.33598327636719, 292.10406494140625, 47.015045166015625, 83.2715072631836, 79.22126007080078, 305.3619384765625, -25.78072166442871, -347.3656005859375, 60.860618591308594, 202.27999877929688, 244.82666015625, 414.28118896484375, 70.396728515625, -60.01221466064453, 316.1626281738281, 51.539794921875, 113.44165802001953, 175.37408447265625, -52.960838317871094, 378.4652099609375, 243.79678344726562, 350.53155517578125, 433.8448486328125, 403.37835693359375, 27.831741333007812, -78.55265808105469, 37.17521667480469, 361.13275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 152.53384399414062, "std": 179.43218994140625, "min": -218.2184295654297, "p10": -37.0833641052246, "median": 113.14550018310547, "p90": 429.16619262695326, "max": 595.250732421875, "pos_frac": 0.8125, "sample": [163.10986328125, 160.28973388671875, 104.08684539794922, 142.56016540527344, 72.72503662109375, 58.139495849609375, 69.10197448730469, 248.16114807128906, 82.36498260498047, 137.781005859375, 89.39743041992188, 444.08392333984375, 168.09402465820312, 394.358154296875, -108.55670928955078, -19.132911682128906, 22.403976440429688, -23.662811279296875, 206.8751220703125, 595.250732421875, -4.263103485107422, 85.61935424804688, 340.766845703125, 458.1397399902344, 120.03441619873047, 201.0531005859375, -48.84136199951172, 77.33045959472656, -218.2184295654297, 119.37837219238281, 575.8228759765625, 211.0484619140625, 28.835569381713867, 80.4056396484375, 279.9531555175781, 1.31884765625, 278.2373352050781, 299.2286376953125, -17.523231506347656, 560.5569458007812, 516.7907104492188, -105.9970932006836, 390.669189453125, 324.91778564453125, 134.17811584472656, 515.2301025390625, 14.807083129882812, 106.91262817382812, 84.47087860107422, 69.61609649658203, 60.39332580566406, 185.64151000976562, 296.271484375, -8.35017204284668, 146.6964569091797, 67.18411254882812, -45.017555236816406, -42.83502960205078, 31.207809448242188, -90.34809875488281, 136.20260620117188, 247.1064910888672, 275.548095703125, 14.555246353149414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 117.06441497802734, "std": 180.24244689941406, "min": -265.8999938964844, "p10": -100.52924423217773, "median": 85.68612289428711, "p90": 384.6962219238282, "max": 637.8726196289062, "pos_frac": 0.734375, "sample": [82.06074523925781, 88.7724380493164, 176.90640258789062, 255.82545471191406, 162.66519165039062, 23.57059097290039, 392.3443908691406, 80.64939880371094, 16.8234920501709, 52.91423797607422, 160.5876922607422, 66.90972900390625, -35.72026824951172, 140.26600646972656, 447.8631591796875, 215.67799377441406, 126.11943054199219, -28.028732299804688, 140.3115234375, 201.9658203125, 51.880706787109375, 67.7420883178711, -10.593799591064453, 258.1201477050781, -80.36495971679688, -265.8999938964844, 324.6361389160156, 109.68551635742188, 203.91102600097656, 221.77197265625, 32.74243927001953, 486.01177978515625, 428.31494140625, 247.44113159179688, 263.7232666015625, 63.19120788574219, -35.52381896972656, -35.376708984375, 89.79421997070312, -8.793338775634766, -68.44725799560547, -115.08321380615234, 161.78189086914062, -107.27018737792969, -112.2486343383789, -41.976173400878906, 637.8726196289062, 368.5186767578125, 219.3803253173828, -95.74919891357422, -152.329345703125, 260.39776611328125, 543.1705932617188, 15.667509078979492, 56.64920425415039, 82.59980773925781, 194.9083709716797, 144.42431640625, 27.93979835510254, -203.65530395507812, 81.16434478759766, -102.57783508300781, 391.62945556640625, 124.4564208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 98.41822814941406, "std": 194.242431640625, "min": -504.0370788574219, "p10": -104.328076171875, "median": 68.05377960205078, "p90": 381.597918701172, "max": 580.7791137695312, "pos_frac": 0.78125, "sample": [463.505859375, 141.2962646484375, 12.364187240600586, 351.3106384277344, 5.662614822387695, 435.48797607421875, -504.0370788574219, 25.245952606201172, 56.32696533203125, 358.9255065917969, -10.475723266601562, 109.05207824707031, 215.08953857421875, 580.7791137695312, 531.1400756835938, 176.75393676757812, 53.35218048095703, 249.81747436523438, 123.54983520507812, 107.98786926269531, 315.9608154296875, 28.734878540039062, -105.32901763916016, 13.84552001953125, -101.99254608154297, -79.82750701904297, 391.3146667480469, -13.793067932128906, 419.1827697753906, 414.2729797363281, 56.332054138183594, 338.1000061035156, 20.302017211914062, 23.89063835144043, 12.830276489257812, 228.4222412109375, 115.98070526123047, 17.624900817871094, 114.88079833984375, 57.174072265625, 269.7831115722656, 127.65928649902344, 138.05264282226562, -220.34613037109375, 71.36006164550781, 173.5305633544922, -138.73194885253906, 5.402120590209961, 3.613199234008789, 195.73086547851562, 31.37127685546875, -105.97017669677734, 0.35767364501953125, -81.65840148925781, -270.151123046875, 110.3220443725586, 158.560302734375, 85.5042495727539, -162.7823944091797, -76.38505554199219, 175.24600219726562, 64.74749755859375, 83.24717712402344, -90.7384262084961], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 81.49143981933594, "std": 136.95655822753906, "min": -289.30035400390625, "p10": -85.44246978759764, "median": 83.3426284790039, "p90": 226.42507324218752, "max": 424.2499694824219, "pos_frac": 0.765625, "sample": [206.77081298828125, 40.28068542480469, 74.73321533203125, -3.2885055541992188, 33.31810760498047, 324.07464599609375, 382.0289001464844, 7.199527740478516, 144.6057586669922, 132.4527130126953, 103.42398834228516, 304.7715759277344, 157.88616943359375, 163.45840454101562, -198.11744689941406, 145.32357788085938, 204.50460815429688, 12.475767135620117, 130.35903930664062, 221.174560546875, 63.62593078613281, 12.399938583374023, -5.9370880126953125, -289.30035400390625, -68.36612701416016, 190.0896759033203, -49.182106018066406, 61.525177001953125, 228.67529296875, 85.54922485351562, -104.31942749023438, 164.13720703125, 138.80967712402344, -44.27452087402344, 263.91455078125, 112.69335174560547, 424.2499694824219, 147.39341735839844, 30.104942321777344, 130.00045776367188, 16.478120803833008, 51.48464584350586, 9.302667617797852, 72.98451232910156, -92.76090240478516, 128.41607666015625, 82.55087280273438, 140.02650451660156, -164.7267608642578, 71.82583618164062, 84.13438415527344, 23.00786590576172, 183.19003295898438, -55.13639831542969, 78.04666137695312, 154.40345764160156, 186.90879821777344, -55.95989227294922, 251.12100219726562, 204.16372680664062, -31.51955795288086, 166.19467163085938, -200.62625122070312, -167.28302001953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 180.90054321289062, "std": 175.87786865234375, "min": -247.724853515625, "p10": -7.732260894775388, "median": 182.35997772216797, "p90": 416.23149414062505, "max": 617.6778564453125, "pos_frac": 0.84375, "sample": [80.05066680908203, 97.20529174804688, 236.9838104248047, -8.659271240234375, 105.27078247070312, 617.6778564453125, -5.569236755371094, 418.12322998046875, 221.77658081054688, 266.1681213378906, 293.2618103027344, 14.915958404541016, 313.29052734375, 256.0884094238281, 143.75070190429688, 286.6949768066406, 3.39129638671875, 473.6499328613281, 15.861478805541992, 128.63168334960938, 232.55374145507812, 354.177490234375, 411.89306640625, 470.2801513671875, 479.2254638671875, 259.0830383300781, 203.82765197753906, -247.724853515625, 88.81758880615234, 98.60124969482422, 173.0535888671875, 184.2815399169922, -37.177520751953125, 81.51155853271484, 86.2371826171875, 180.43841552734375, 97.65828704833984, -137.1920928955078, 269.12359619140625, 418.0908203125, -1.1321029663085938, 332.84344482421875, 241.29164123535156, 313.0456237792969, 117.10054016113281, 242.99447631835938, 43.565338134765625, 402.5911560058594, 316.79144287109375, 233.622314453125, -21.159862518310547, -89.96804809570312, 1.6144027709960938, 253.28451538085938, 12.262100219726562, -86.59088134765625, -1.6532649993896484, 119.6433334350586, 38.86967849731445, 233.68731689453125, 316.325439453125, 109.16790771484375, 617.4475708007812, 206.66709899902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 137.98849487304688, "std": 171.9808349609375, "min": -212.4915771484375, "p10": -68.26344680786131, "median": 127.6115608215332, "p90": 384.70843200683606, "max": 658.06396484375, "pos_frac": 0.796875, "sample": [-36.63037872314453, 457.3437805175781, -32.86305236816406, -89.41523742675781, -7.198997497558594, -80.24065399169922, 18.161651611328125, 129.6144561767578, -51.45379638671875, 223.92312622070312, 113.07909393310547, 115.06427764892578, 445.31170654296875, 78.71823120117188, 136.83648681640625, 235.44570922851562, -202.35488891601562, 158.06988525390625, 308.4139709472656, 166.76446533203125, 165.09323120117188, 133.70059204101562, -212.4915771484375, 326.7745666503906, -123.97107696533203, 246.51637268066406, 495.12518310546875, 97.92408752441406, 26.07300567626953, 21.87701416015625, 130.18389892578125, 24.221435546875, 187.9481964111328, -75.46758270263672, 215.54689025878906, 251.03257751464844, 45.13047790527344, 177.71450805664062, 7.767406463623047, 250.235595703125, -30.590248107910156, 101.94377136230469, 79.13013458251953, 107.79035949707031, 360.5910949707031, 279.03900146484375, 125.6086654663086, 287.858154296875, 182.14413452148438, 48.195556640625, -36.15171813964844, 145.82595825195312, 395.04443359375, 72.03619384765625, 439.4132995605469, 253.76254272460938, 152.7247772216797, -108.67289733886719, 212.03683471679688, 32.413597106933594, 658.06396484375, 72.503173828125, 434.4765625, 88.55076599121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 115.79170227050781, "std": 194.60809326171875, "min": -361.97027587890625, "p10": -127.13704299926756, "median": 115.31296157836914, "p90": 344.3697662353517, "max": 668.9717407226562, "pos_frac": 0.734375, "sample": [93.46501159667969, -77.54381561279297, 257.8994140625, 153.19454956054688, 135.60089111328125, 438.47186279296875, 206.23223876953125, 117.44969940185547, 203.67332458496094, 168.979736328125, -158.1422119140625, 103.6041259765625, 69.83695983886719, 252.96145629882812, 312.60833740234375, 287.5020446777344, 140.6185302734375, 101.12916564941406, 585.5743408203125, -17.692218780517578, 116.1689224243164, -68.47672271728516, 91.96381378173828, -79.64494323730469, -60.105072021484375, 321.0095520019531, -167.32167053222656, 407.51611328125, 111.62834930419922, -132.79647827148438, 5.6530303955078125, 11.814743041992188, 303.3763427734375, -361.97027587890625, 114.45700073242188, 147.59719848632812, -214.21397399902344, 227.45660400390625, 75.94282531738281, 668.9717407226562, 225.5585479736328, -113.93169403076172, 198.9429473876953, 293.703125, -202.06832885742188, -103.83450317382812, 354.38128662109375, 389.45050048828125, 158.2440185546875, -19.864280700683594, 52.292503356933594, 73.36388397216797, 105.2486572265625, 466.25244140625, 17.447425842285156, -6.785453796386719, -12.089590072631836, 6.970357894897461, 158.24270629882812, 125.0140380859375, 117.18067169189453, 185.17361450195312, -239.2066650390625, 286.5323486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 134.4194793701172, "std": 203.33038330078125, "min": -383.9190368652344, "p10": -82.9271026611328, "median": 86.79823303222656, "p90": 417.28309631347673, "max": 735.936279296875, "pos_frac": 0.796875, "sample": [282.6805419921875, 15.61529541015625, 153.4086456298828, 13.375511169433594, -77.65213012695312, 628.8536987304688, 241.51609802246094, 50.744606018066406, 93.54357147216797, -383.9190368652344, 50.640289306640625, 162.6835479736328, 147.18092346191406, 172.4016571044922, 478.47137451171875, 91.72029113769531, 24.19182586669922, 196.3788299560547, 200.03724670410156, 735.936279296875, 311.00714111328125, 131.61038208007812, 343.9367980957031, 280.3259582519531, -113.58583068847656, -73.98214721679688, 306.5758361816406, 53.36178207397461, 267.2346496582031, -8.187454223632812, 41.51893615722656, 42.659088134765625, 44.886680603027344, 721.246337890625, 278.68914794921875, 254.00637817382812, 113.86905670166016, -118.5540542602539, 24.760251998901367, 35.902156829833984, 261.37542724609375, -2.64312744140625, 78.58686828613281, -22.17630386352539, 102.47201538085938, -100.86743927001953, -85.18780517578125, -6.3977203369140625, -97.53559875488281, 37.4068717956543, 181.81431579589844, 158.41693115234375, 436.955810546875, 10.330690383911133, 3.640289306640625, 86.80068969726562, 3.84564208984375, 371.3800964355469, 40.734649658203125, -89.08599853515625, 86.7957763671875, 443.75, 473.4549255371094, 13.889522552490234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 158.4615020751953, "std": 211.44895935058594, "min": -287.3526916503906, "p10": -95.51998062133788, "median": 160.3989715576172, "p90": 422.9043273925782, "max": 863.830810546875, "pos_frac": 0.71875, "sample": [221.205322265625, 192.71771240234375, 444.67816162109375, 394.45849609375, 33.447181701660156, 124.43805694580078, 27.9549560546875, -79.93365478515625, -176.11138916015625, 426.27960205078125, 415.0286865234375, 308.70037841796875, -166.5316925048828, -98.29857635498047, 308.5279541015625, -39.388301849365234, -139.1551055908203, 17.02467918395996, -38.83414840698242, 96.60282897949219, 98.1865234375, 136.15628051757812, 276.7367248535156, 187.306396484375, -16.05943489074707, 190.44622802734375, 27.551116943359375, 184.5532684326172, -49.51748275756836, 238.68392944335938, 388.24591064453125, 174.32241821289062, 232.5803985595703, 95.5907211303711, -89.03659057617188, 546.251953125, 464.45635986328125, -18.32947540283203, 266.8115234375, 448.859375, 114.76687622070312, 402.9888916015625, 225.05618286132812, -44.90898895263672, 169.595947265625, 154.48587036132812, 863.830810546875, 336.7324523925781, 166.31207275390625, 73.53822326660156, 516.0847778320312, -21.15325927734375, 304.8962707519531, -101.3341064453125, -137.72967529296875, 413.30438232421875, 347.51861572265625, 198.02072143554688, 188.80625915527344, -287.3526916503906, 120.07332611083984, 145.94735717773438, -55.62873077392578, -8.9224853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 128.18907165527344, "std": 202.84584045410156, "min": -279.3595886230469, "p10": -55.58008346557615, "median": 75.19646453857422, "p90": 366.2356140136719, "max": 686.8114013671875, "pos_frac": 0.734375, "sample": [650.8798217773438, -181.89114379882812, -38.80556869506836, 6.023773193359375, -18.145225524902344, 190.25457763671875, -13.398773193359375, 69.48619079589844, 215.44924926757812, -62.769161224365234, 139.322265625, 11.806137084960938, -27.883071899414062, 24.9300537109375, -219.53945922851562, 80.90673828125, -147.8545684814453, 368.9830017089844, 359.26177978515625, 23.86473846435547, 359.8250427246094, 358.365478515625, 57.798606872558594, 143.61602783203125, 195.99888610839844, 406.882080078125, 2.3631515502929688, -31.246612548828125, 152.46893310546875, 555.1201171875, 358.77215576171875, -90.38624572753906, 210.85028076171875, 87.90553283691406, 17.36876106262207, 686.8114013671875, 275.0145263671875, 605.0092163085938, 312.7406921386719, -11.483207702636719, -16.995407104492188, 167.59857177734375, -279.3595886230469, 192.27700805664062, 21.12302017211914, 131.2399444580078, 12.092964172363281, -1.225433349609375, 18.322778701782227, 98.1794662475586, -5.577972412109375, 286.8271484375, 128.86361694335938, 210.04708862304688, 37.52910614013672, 158.7521209716797, -101.24609375, 6.151798248291016, 541.548583984375, 45.438072204589844, 59.040321350097656, 179.84912109375, -34.089202880859375, 263.037353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 161.3520050048828, "std": 204.45578002929688, "min": -420.8387145996094, "p10": -39.82483539581297, "median": 133.2384490966797, "p90": 421.1007965087891, "max": 791.6057739257812, "pos_frac": 0.828125, "sample": [336.2776794433594, 120.46749877929688, 187.29721069335938, 456.31207275390625, 219.78152465820312, 46.89165496826172, -12.197067260742188, 65.27167510986328, 333.45599365234375, 2.182708740234375, 137.5113525390625, 141.5552520751953, 185.65341186523438, 39.95719909667969, -47.65861892700195, 43.833213806152344, 188.923095703125, 89.66050720214844, 260.927001953125, 791.6057739257812, 167.43392944335938, 120.60865783691406, 388.4803771972656, 301.2958984375, 48.715476989746094, 344.899169921875, 165.4285888671875, 280.08502197265625, 128.96554565429688, 104.27650451660156, 178.29473876953125, 85.03959655761719, 359.548095703125, 6.248506546020508, -103.08892822265625, -103.42533874511719, 223.46441650390625, 230.8460693359375, 641.3995361328125, 113.21102905273438, 227.20327758789062, 495.8253173828125, -115.52166748046875, 639.8268432617188, -21.54600715637207, 18.944725036621094, 198.6324920654297, 29.050453186035156, 238.30947875976562, 41.70955276489258, 154.85296630859375, 413.5447998046875, 90.30059814453125, -9.438369750976562, 2.945352554321289, -71.74183654785156, 424.3390808105469, -420.8387145996094, -13.977584838867188, 116.282958984375, -105.55028533935547, 18.410104751586914, 163.85769653320312, 541.6712646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 108.98725128173828, "std": 198.84368896484375, "min": -373.4171447753906, "p10": -150.848046875, "median": 104.68772888183594, "p90": 353.7975708007813, "max": 656.1574096679688, "pos_frac": 0.71875, "sample": [-180.14578247070312, -350.17626953125, -373.4171447753906, 271.0236511230469, 151.05331420898438, 369.70184326171875, -253.72763061523438, 215.22799682617188, 211.87652587890625, -184.218994140625, 41.75323486328125, 357.6258850097656, 656.1574096679688, 344.8648376464844, 526.736572265625, 340.49713134765625, 414.56781005859375, 199.26109313964844, 131.27955627441406, -58.75086975097656, -64.52058410644531, 297.6492919921875, 157.09237670898438, 361.36309814453125, 196.11810302734375, 173.13963317871094, 159.2324676513672, 20.112300872802734, -119.99860382080078, 104.58323669433594, 38.08717346191406, 202.17405700683594, 11.037975311279297, 204.92269897460938, 228.448486328125, 67.264404296875, 149.54818725585938, 78.37747192382812, -8.641311645507812, 45.562286376953125, 129.4688262939453, 261.102783203125, -156.03170776367188, -6.7252655029296875, 104.79222106933594, 103.51720428466797, 471.0732727050781, 23.147201538085938, 87.26455688476562, 68.31668853759766, 303.289794921875, -42.49626541137695, -18.230884552001953, 181.21170043945312, 253.6818084716797, 176.55267333984375, 248.84910583496094, 100.6501693725586, -138.75283813476562, -77.55221557617188, -32.923255920410156, 57.4896240234375, -186.9454345703125, -68.30852508544922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 113.79228210449219, "std": 187.8021240234375, "min": -454.5443115234375, "p10": -83.37683639526367, "median": 102.53955459594727, "p90": 372.5904083251953, "max": 628.1451416015625, "pos_frac": 0.765625, "sample": [-27.052322387695312, 18.695863723754883, -172.88983154296875, 470.077880859375, 50.9933967590332, -212.24227905273438, 115.83637237548828, 182.66976928710938, 94.46221923828125, 235.27938842773438, 276.9856262207031, 400.546875, 111.36265563964844, 450.0630187988281, 108.92317199707031, 96.92955780029297, 316.8605041503906, 102.62406158447266, 6.748249053955078, 367.1715393066406, -85.32350158691406, 61.342071533203125, 13.726360321044922, 234.67823791503906, 147.6675567626953, 25.199689865112305, 628.1451416015625, -238.76104736328125, 57.91859436035156, 290.7256774902344, 129.6793212890625, 247.54788208007812, 203.04400634765625, 374.91278076171875, 345.5724182128906, 17.94046974182129, 435.51165771484375, 23.442970275878906, 35.00665283203125, -55.544769287109375, 31.398841857910156, 146.01214599609375, 48.490501403808594, 160.8184051513672, -78.8346176147461, 209.88192749023438, 177.23443603515625, 230.7034454345703, -45.758697509765625, -144.93544006347656, 120.94820404052734, 462.4810791015625, 39.341064453125, 102.45504760742188, 58.87787628173828, 127.83030700683594, -13.940635681152344, 216.44674682617188, -41.23152160644531, 147.90823364257812, -454.5443115234375, -0.670166015625, -19.230636596679688, -85.4540786743164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 179.509765625, "std": 179.3465576171875, "min": -302.31707763671875, "p10": -29.327760887145995, "median": 164.14273834228516, "p90": 441.8723937988282, "max": 588.0833740234375, "pos_frac": 0.828125, "sample": [62.275604248046875, 393.67584228515625, -32.65513610839844, 321.88616943359375, 215.91937255859375, 83.68817901611328, -27.328004837036133, 288.1944580078125, 100.7425537109375, 165.42572021484375, 267.7710266113281, 162.10122680664062, 42.387535095214844, 301.7476806640625, -11.85919189453125, 165.94338989257812, -55.46208190917969, 103.90591430664062, 151.92318725585938, 158.88388061523438, 14.0736083984375, -118.07437133789062, -30.184799194335938, 220.68753051757812, 146.30117797851562, 164.93655395507812, 241.08860778808594, 387.84625244140625, 378.1023254394531, 143.15115356445312, 74.1908187866211, 34.082332611083984, 411.4871826171875, 509.4494323730469, 163.3489227294922, -147.38427734375, 138.2566680908203, 177.47048950195312, 254.54025268554688, 472.790771484375, -19.958484649658203, -9.227462768554688, -302.31707763671875, -37.489097595214844, 79.83253479003906, 32.6951904296875, 456.1781311035156, 127.22930908203125, 383.9976501464844, 450.45928955078125, 179.54991149902344, 121.77110290527344, 78.66691589355469, 22.99135398864746, 465.86260986328125, 341.7205810546875, 511.5307922363281, 212.05909729003906, 421.8363037109375, 203.6791229248047, 233.17323303222656, 183.31857299804688, 267.6536865234375, 588.0833740234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 119.82038879394531, "std": 169.72195434570312, "min": -156.57125854492188, "p10": -56.94446830749511, "median": 78.06023788452148, "p90": 392.56340942382815, "max": 692.89208984375, "pos_frac": 0.75, "sample": [458.13531494140625, 77.03062438964844, -40.693565368652344, 330.27001953125, 21.44849395751953, 75.16468048095703, 396.8149719238281, 19.289154052734375, 67.11553192138672, 79.08985137939453, 113.27702331542969, 382.6430969238281, -102.87857055664062, 73.8478775024414, 533.2176513671875, 168.86627197265625, 196.7950439453125, 37.50716018676758, 131.34030151367188, 218.474853515625, 405.548828125, 68.62382507324219, 108.42511749267578, -25.950454711914062, 61.57087707519531, -108.10205841064453, 397.75201416015625, -50.32217788696289, 265.2186279296875, 692.89208984375, 490.3955383300781, 295.878173828125, 135.65899658203125, 35.29804229736328, -156.57125854492188, 60.88981246948242, 120.38201904296875, 177.64358520507812, 79.56103515625, -63.37103271484375, -14.203643798828125, 30.68314552307129, 244.40924072265625, 62.06468200683594, -40.459068298339844, 131.82159423828125, -59.7825927734375, -102.56953430175781, 49.78644943237305, 107.17984008789062, 303.35064697265625, 153.93597412109375, 104.74744415283203, -14.872806549072266, -44.17154312133789, 151.59286499023438, -69.19153594970703, 131.16078186035156, 29.166290283203125, -8.285797119140625, -1.9107666015625, 165.0305938720703, 115.32025146484375, 15.524398803710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 93.8593978881836, "std": 168.92898559570312, "min": -173.1400909423828, "p10": -84.05035171508788, "median": 52.948909759521484, "p90": 312.7547088623047, "max": 673.2322998046875, "pos_frac": 0.703125, "sample": [164.3289794921875, 90.14449310302734, 5.866968154907227, 39.20368194580078, 24.908706665039062, -144.3006134033203, 281.1744689941406, 317.3185119628906, -39.411865234375, 245.35977172851562, 85.67932891845703, 472.7335205078125, 68.04242706298828, -99.30477142333984, -32.99174499511719, 40.159934997558594, -38.91357421875, 199.60061645507812, -86.32017517089844, 78.52523040771484, 149.63172912597656, 596.6053466796875, 33.702301025390625, 65.87197875976562, 109.53443145751953, -61.994503021240234, 673.2322998046875, -13.189620971679688, 45.863983154296875, 260.060302734375, 138.85494995117188, 200.40933227539062, -59.108367919921875, 81.629150390625, -137.24020385742188, 92.45353698730469, -78.75409698486328, 28.72901153564453, -114.06280517578125, 411.15191650390625, 33.097930908203125, -16.576072692871094, 12.982063293457031, -93.41517639160156, 32.0699348449707, 107.60591888427734, -4.221649169921875, -173.1400909423828, 330.86895751953125, 161.1305389404297, 371.51190185546875, 27.180004119873047, -15.9879150390625, 282.2845764160156, 227.69064331054688, -16.456117630004883, 92.93292999267578, 302.1058349609375, -28.557722091674805, 55.714805603027344, 93.05589294433594, 13.552921295166016, 50.183013916015625, 66.20378875732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 159.8199462890625, "std": 232.86300659179688, "min": -223.64268493652344, "p10": -110.09565887451171, "median": 95.66186904907227, "p90": 483.02344970703126, "max": 754.044677734375, "pos_frac": 0.71875, "sample": [14.05375862121582, -3.429615020751953, 562.8488159179688, -47.34959411621094, 179.21163940429688, 212.84365844726562, -223.64268493652344, -125.58513641357422, 35.67192077636719, -187.09365844726562, 149.5694122314453, 55.419403076171875, 156.6735382080078, 253.82064819335938, 420.90399169921875, 754.044677734375, 95.27750396728516, 93.10592651367188, 535.8712768554688, 363.82635498046875, 56.79838562011719, 170.2306671142578, 92.00639343261719, 583.7310791015625, 25.27550506591797, 315.2463684082031, 2.1317138671875, -122.97833251953125, -212.6053466796875, -53.564273834228516, 211.6066436767578, 443.5639343261719, 212.1920166015625, 176.72518920898438, 311.4111328125, 533.42919921875, 9.049947738647461, 456.05010986328125, -98.27981567382812, 72.83270263671875, 137.22015380859375, 404.7298583984375, -42.53727340698242, 375.5940246582031, 93.24209594726562, 484.996337890625, -45.902156829833984, -204.99285888671875, -115.15959167480469, -60.659515380859375, 46.78533935546875, -57.942481994628906, 365.94781494140625, 96.04623413085938, -47.715423583984375, 215.05934143066406, -36.70327377319336, 478.4200439453125, -45.50835037231445, 663.0054321289062, 422.5264892578125, 70.07148742675781, 335.22991943359375, 215.82806396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 162.1198272705078, "std": 182.42877197265625, "min": -243.78053283691406, "p10": -45.71434402465818, "median": 144.39620208740234, "p90": 376.7450866699219, "max": 661.6741943359375, "pos_frac": 0.828125, "sample": [-72.9651870727539, 248.5166473388672, 377.0994873046875, 259.8043212890625, 364.6535339355469, 124.10546112060547, 158.81622314453125, 204.34957885742188, 429.5914001464844, -17.953733444213867, 340.38299560546875, 106.54356384277344, -243.78053283691406, 0.8140907287597656, 78.74822998046875, 279.7751159667969, 85.1439208984375, -101.43177032470703, 87.45982360839844, 357.064453125, 642.8321533203125, 171.16317749023438, 10.850393295288086, 6.213584899902344, 74.09667205810547, -24.616744995117188, 200.62704467773438, 300.64276123046875, 19.115554809570312, 375.91815185546875, -9.88493537902832, 151.4864044189453, 476.426513671875, 157.56881713867188, 544.6071166992188, 247.03250122070312, 71.83672332763672, 137.30599975585938, 98.11161804199219, 224.6752166748047, 272.3099670410156, 186.220703125, 254.18435668945312, 72.79900360107422, 0.7949752807617188, 75.65487670898438, -123.02256774902344, -16.73180389404297, 279.8280944824219, 106.43324279785156, 94.49298858642578, 407.64190673828125, 661.6741943359375, -54.75617218017578, 283.0950012207031, 177.38250732421875, -105.14176940917969, 19.785985946655273, 193.2584686279297, 72.18797302246094, 134.5221405029297, -122.54380798339844, 355.5155029296875, 207.33676147460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 102.77667236328125, "std": 191.23876953125, "min": -480.51678466796875, "p10": -96.68790512084959, "median": 115.20473098754883, "p90": 331.2009307861328, "max": 610.7205810546875, "pos_frac": 0.734375, "sample": [31.581878662109375, 205.66098022460938, -480.51678466796875, 43.120330810546875, 134.3233642578125, 569.169921875, 117.10034942626953, 299.68341064453125, -49.52908706665039, 165.69471740722656, -171.89404296875, -59.8719482421875, 157.17568969726562, 331.327392578125, 17.187896728515625, 32.15504455566406, -73.91818237304688, -16.32128143310547, 157.7802734375, -296.00408935546875, 19.91225814819336, 37.5723876953125, -28.32848358154297, 82.0119400024414, 387.0498352050781, 207.77027893066406, -24.73332977294922, -110.10757446289062, 125.11121368408203, 202.50987243652344, 121.51995849609375, 118.52647399902344, 336.56396484375, -64.97454833984375, 145.3773651123047, 211.55706787109375, -24.753480911254883, 330.9058532714844, 46.10032653808594, 148.08526611328125, 22.277950286865234, 97.23887634277344, 610.7205810546875, 277.3354797363281, 182.67254638671875, 164.38864135742188, 166.49227905273438, -37.952598571777344, -229.18081665039062, 165.8327178955078, 586.272705078125, 298.0347900390625, -134.7158203125, 28.710983276367188, 113.30911254882812, 3.666872024536133, -106.44635772705078, 224.3057861328125, 112.86770629882812, 27.2313232421875, 214.32012939453125, 128.2030792236328, 351.5525817871094, -71.01419067382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 165.87701416015625, "std": 159.79266357421875, "min": -94.6139144897461, "p10": -39.063559341430654, "median": 137.82152557373047, "p90": 396.26282348632816, "max": 499.9494323730469, "pos_frac": 0.84375, "sample": [429.4028015136719, 70.5820541381836, 8.043777465820312, 153.92019653320312, 251.45950317382812, 127.65055847167969, 492.9084167480469, 30.734638214111328, 54.947383880615234, 152.7369384765625, 309.5640869140625, 398.3205871582031, 100.98113250732422, 302.7920227050781, 292.76043701171875, 368.0461730957031, 67.86602783203125, 267.88433837890625, -43.859413146972656, 374.410888671875, 165.74493408203125, -71.08380126953125, 72.9266586303711, 286.46563720703125, 215.77035522460938, 16.54433822631836, 120.41790008544922, 391.7415771484375, 187.39260864257812, 38.02998352050781, 474.4830017089844, 122.0064468383789, -78.46546936035156, 250.5969696044922, -3.2422218322753906, 257.3382568359375, 103.85340118408203, 487.19976806640625, 70.93904876708984, -27.873233795166016, 129.69961547851562, 133.3057861328125, -20.556665420532227, 36.35468292236328, 18.211856842041016, 232.2336883544922, 142.33726501464844, 102.44865417480469, -94.6139144897461, -78.77560424804688, 201.91592407226562, 499.9494323730469, 374.75347900390625, 145.77020263671875, 217.78952026367188, 129.68722534179688, 298.4082336425781, 91.44621276855469, 4.988349914550781, 265.48187255859375, -63.24555969238281, 398.20050048828125, -61.49516677856445, 219.89537048339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 122.33268737792969, "std": 192.7683868408203, "min": -411.50897216796875, "p10": -110.09808120727538, "median": 115.83683776855469, "p90": 357.19478759765644, "max": 721.1951293945312, "pos_frac": 0.734375, "sample": [198.14540100097656, 201.6851806640625, 189.06964111328125, 178.59945678710938, -9.877922058105469, 130.1378936767578, 132.27468872070312, 162.13250732421875, 20.343151092529297, 203.51934814453125, 73.95281219482422, 31.086669921875, 462.25714111328125, 112.62303924560547, 227.35739135742188, -175.57984924316406, -140.2716827392578, 163.136962890625, -115.59699249267578, 76.18346405029297, 153.29324340820312, -122.88713073730469, 375.0281982421875, 721.1951293945312, -94.36308288574219, 453.8741455078125, 298.760986328125, 31.7208251953125, -18.065231323242188, 30.01876449584961, 503.0582275390625, 79.72016906738281, 245.2652130126953, 207.89642333984375, 20.11969757080078, 315.58349609375, -20.987159729003906, 157.27304077148438, 312.298095703125, -32.52686309814453, 233.69412231445312, 99.18462371826172, 185.00918579101562, 252.86495971679688, 386.613525390625, 70.41998291015625, 28.67193603515625, 9.976036071777344, -411.50897216796875, 299.2338562011719, -54.61100769042969, 69.18636322021484, -172.82423400878906, -45.26671600341797, 220.67404174804688, 308.82830810546875, -62.40290069580078, 471.077880859375, -3.6180496215820312, -155.92747497558594, -97.26728820800781, 119.0506362915039, 69.85403442382812, 270.9249572753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 130.10498046875, "std": 189.42018127441406, "min": -239.505859375, "p10": -58.767035675048824, "median": 88.4557876586914, "p90": 351.44429016113287, "max": 833.3462524414062, "pos_frac": 0.765625, "sample": [3.9620208740234375, -27.794754028320312, 10.531402587890625, 129.94703674316406, 28.612476348876953, -9.55517578125, 153.48509216308594, 304.47125244140625, -167.4556884765625, 454.26043701171875, 78.0287094116211, 156.36424255371094, 76.32521057128906, 69.98243713378906, -51.534095764160156, 220.49913024902344, 333.5861511230469, -95.33961486816406, 24.009817123413086, 54.41495895385742, 9.06690788269043, 652.73583984375, 50.14140319824219, 198.27183532714844, -61.86686706542969, 538.5411376953125, 122.1492919921875, -65.20662689208984, 133.4635467529297, -128.33670043945312, 151.59713745117188, 125.07183837890625, 235.7083740234375, 98.88286590576172, 395.22393798828125, 19.906505584716797, 67.65884399414062, 136.0536651611328, 833.3462524414062, 221.789306640625, -16.402362823486328, 36.527587890625, 75.58232116699219, -8.780693054199219, -23.350601196289062, 321.3277893066406, 225.1632537841797, 177.591552734375, 69.45500946044922, 466.5157775878906, 255.36402893066406, 21.400314331054688, 204.19100952148438, 274.6855773925781, -239.505859375, 359.0977783203125, 268.1764831542969, 61.44923400878906, -74.57317352294922, -45.660186767578125, 199.51541137695312, 143.02035522460938, -47.836639404296875, 142.7646942138672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 140.81826782226562, "std": 188.0844268798828, "min": -319.09417724609375, "p10": -71.24435043334961, "median": 128.63917922973633, "p90": 405.05075378417973, "max": 554.9912109375, "pos_frac": 0.796875, "sample": [386.91845703125, 280.76019287109375, -177.172119140625, 383.05499267578125, -113.68431091308594, 176.40570068359375, 60.59014892578125, 141.74832153320312, -71.45257568359375, -126.90167236328125, 26.790943145751953, 30.08453369140625, 89.54804992675781, 289.9526672363281, 89.29319763183594, 2.8495330810546875, 554.9912109375, 129.58995056152344, 197.12759399414062, 127.68840789794922, 228.3705596923828, 201.9639434814453, 263.49658203125, -61.49673080444336, -70.75849151611328, 21.042173385620117, 184.2751007080078, 416.9030456542969, 242.5237579345703, 107.36579895019531, -22.69319725036621, 49.964385986328125, 416.70684814453125, 411.3536376953125, -305.04351806640625, 93.70502471923828, -35.09258270263672, 356.068115234375, 11.324539184570312, 390.3440246582031, 20.654666900634766, 359.216796875, 373.5503845214844, 212.1492919921875, -23.934158325195312, 31.828529357910156, -319.09417724609375, 133.1279754638672, -33.45853805541992, 190.95053100585938, 130.3916473388672, 423.62567138671875, 326.29852294921875, 67.38594055175781, 486.78118896484375, 193.81802368164062, 456.390625, 91.61067199707031, 118.88554382324219, 141.95431518554688, 86.81393432617188, -77.80261993408203, 242.1007537841797, 0.6168727874755859], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 114.69682312011719, "std": 210.41390991210938, "min": -305.28271484375, "p10": -214.09062194824216, "median": 128.28654098510742, "p90": 425.7636352539063, "max": 593.64892578125, "pos_frac": 0.75, "sample": [127.65019989013672, 102.39762878417969, 190.79324340820312, 108.44403839111328, 172.7472686767578, 345.95965576171875, 139.07986450195312, 430.2310791015625, -223.376220703125, -52.853736877441406, 203.7671661376953, 136.04664611816406, -254.18539428710938, 205.88360595703125, 415.339599609375, 40.228736877441406, -241.47125244140625, 64.51605224609375, 44.39037322998047, 319.1353759765625, 178.46340942382812, 500.3011474609375, 210.32650756835938, 149.50790405273438, 85.47048950195312, -2.9283180236816406, -283.2637023925781, 145.4093017578125, 80.69271850585938, 505.6614990234375, 228.6338653564453, 122.94908142089844, 46.62760925292969, 20.171791076660156, -271.71380615234375, 458.3361511230469, 201.7678680419922, 189.3519744873047, -238.9541015625, 98.6821517944336, -70.46633911132812, 303.0833435058594, -192.42422485351562, 41.08164596557617, 593.64892578125, -188.91473388671875, 273.48095703125, -27.652963638305664, 262.4442443847656, 216.34706115722656, 128.92288208007812, 450.01898193359375, -49.80914306640625, 114.9297103881836, -153.2213897705078, 142.02005004882812, 89.81572723388672, -79.48275756835938, 142.94454956054688, 244.6393280029297, 51.24713134765625, 136.1650390625, 516.8439331054688, -305.28271484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 189.0645294189453, "std": 222.94143676757812, "min": -248.6968231201172, "p10": -31.96116828918457, "median": 173.90509033203125, "p90": 455.6630828857422, "max": 896.102294921875, "pos_frac": 0.828125, "sample": [94.05895233154297, 89.75736999511719, -242.182373046875, -8.304025650024414, 42.251922607421875, 239.93450927734375, 258.24481201171875, 458.90631103515625, 79.75119018554688, 252.43801879882812, 109.52671813964844, -72.53253173828125, -8.407886505126953, 579.3104858398438, 9.760191917419434, 225.12586975097656, -32.21165084838867, 369.8934020996094, 200.0380859375, 175.63003540039062, 351.1793212890625, 181.42526245117188, 126.25835418701172, 157.58290100097656, 896.102294921875, 448.0955505371094, 221.28271484375, 152.07180786132812, 629.20654296875, 69.80826568603516, 870.2589721679688, 508.6468505859375, 325.6016845703125, 59.191490173339844, 64.36158752441406, 185.7116241455078, 249.6119384765625, 98.3116455078125, -31.376708984375, 186.72975158691406, 105.2341079711914, 53.89483642578125, 106.51858520507812, -16.167449951171875, 259.270263671875, 45.659088134765625, 345.30694580078125, -86.53506469726562, 237.21035766601562, 200.0950927734375, 86.18630981445312, 22.030601501464844, 370.1006164550781, 537.3697509765625, 275.50714111328125, 172.18014526367188, -190.664306640625, 291.49102783203125, 289.9567565917969, 304.18994140625, -248.6968231201172, 418.6047668457031, 107.13346862792969, -156.7972412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 125.836669921875, "std": 198.9674072265625, "min": -390.22601318359375, "p10": -72.01899185180663, "median": 93.55659484863281, "p90": 330.9319458007813, "max": 804.4995727539062, "pos_frac": 0.765625, "sample": [-90.28538513183594, 9.40139389038086, 163.0240478515625, 252.18641662597656, 34.379085540771484, -390.22601318359375, -7.6407623291015625, -21.734825134277344, 320.89166259765625, 199.98031616210938, 17.7865047454834, 313.10107421875, 717.0453491210938, -65.20474243164062, -42.46100616455078, 66.07364654541016, 110.21993255615234, 76.24261474609375, 92.18914794921875, 15.871679306030273, 219.1606903076172, 94.92404174804688, 266.6900634765625, 199.27444458007812, 270.89996337890625, -74.93938446044922, 86.86870574951172, 47.80559539794922, 130.29237365722656, -97.96578979492188, 168.16317749023438, 239.834228515625, 89.75194549560547, -153.43328857421875, 335.23492431640625, 146.24911499023438, 343.6405944824219, 486.76824951171875, 11.842903137207031, 98.26986694335938, 99.02006530761719, 210.84034729003906, -38.983856201171875, 804.4995727539062, 67.33219146728516, 74.53743743896484, 479.86920166015625, 84.60659790039062, -36.86643981933594, 18.45805549621582, -181.32949829101562, 298.841796875, 220.80006408691406, -19.578346252441406, 53.68672180175781, 141.87069702148438, 156.3372039794922, -161.46633911132812, 255.28465270996094, 266.2184143066406, 13.436113357543945, 498.0577087402344, 131.74453735351562, -63.843162536621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 158.58253479003906, "std": 192.64886474609375, "min": -290.1872253417969, "p10": -74.03828125, "median": 133.88812255859375, "p90": 413.91855163574223, "max": 766.5216674804688, "pos_frac": 0.84375, "sample": [393.936767578125, 12.37887191772461, 78.17691802978516, 528.9317016601562, 198.65792846679688, 30.75572967529297, 152.91419982910156, 181.6785888671875, -103.94859313964844, -111.0519790649414, -74.76632690429688, 82.80862426757812, 257.0789794921875, 17.00181007385254, -157.07334899902344, 47.445037841796875, -91.3238525390625, 344.72509765625, 161.56556701660156, 163.49191284179688, -290.1872253417969, 30.964317321777344, 184.7479248046875, 30.476646423339844, -0.6879482269287109, 409.4163818359375, 45.897300720214844, 237.3104248046875, 106.24156951904297, 368.95916748046875, 328.65594482421875, 258.10321044921875, -81.36597442626953, 133.6507568359375, -72.33950805664062, 335.2183532714844, 283.9701232910156, 493.8901672363281, 389.2755126953125, 289.0755920410156, 310.18896484375, 138.29318237304688, 449.92303466796875, 766.5216674804688, 415.86102294921875, 40.73054504394531, 134.12548828125, 54.61075210571289, 415.8480529785156, -28.734039306640625, 126.95823669433594, 204.85015869140625, 14.76336669921875, 125.78302001953125, 525.3617553710938, 17.955055236816406, 136.14093017578125, 69.04002380371094, 14.554573059082031, 217.06048583984375, 3.5753631591796875, 88.57833862304688, 68.81288146972656, 243.82244873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 160.67831420898438, "std": 171.43768310546875, "min": -142.6009063720703, "p10": -27.790362358093258, "median": 145.76449584960938, "p90": 414.96916198730474, "max": 662.769775390625, "pos_frac": 0.8125, "sample": [108.94297790527344, -0.08687591552734375, 110.05563354492188, 162.82626342773438, -11.11014175415039, 58.81306076049805, 475.4150085449219, 135.49087524414062, -63.067657470703125, -25.1502685546875, 159.5181427001953, 239.47665405273438, 28.868560791015625, 306.0057373046875, 60.679931640625, 159.5327606201172, 3.9320755004882812, 421.8735656738281, 22.376163482666016, 114.40393829345703, -142.6009063720703, 142.39627075195312, 270.67987060546875, -76.19837951660156, 189.45111083984375, 27.05558967590332, 149.13272094726562, 206.98175048828125, 157.8223114013672, 63.129791259765625, 294.0966796875, 132.397705078125, 370.415771484375, 331.0008544921875, 514.3271484375, 555.6544799804688, 153.1692657470703, 431.6578063964844, 31.276947021484375, 662.769775390625, 202.85089111328125, -17.6310977935791, 358.3863525390625, 184.83370971679688, 177.60816955566406, -86.77486419677734, 185.8673553466797, -28.921831130981445, -49.262420654296875, 262.18707275390625, 25.063629150390625, -40.146873474121094, 192.61959838867188, 59.829307556152344, 10.627838134765625, 119.06663513183594, 398.85888671875, 253.12835693359375, -23.602039337158203, 117.33526611328125, 45.954620361328125, 449.45831298828125, 218.5832977294922, 334.0785217285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 123.89741516113281, "std": 195.14279174804688, "min": -575.5533447265625, "p10": -29.65414237976074, "median": 102.45393753051758, "p90": 353.03959655761724, "max": 657.3109741210938, "pos_frac": 0.765625, "sample": [117.00940704345703, 382.79815673828125, 167.96969604492188, 128.87730407714844, 194.412353515625, 146.34681701660156, 62.229217529296875, -14.162002563476562, 199.43856811523438, 85.3135986328125, 106.10342407226562, 102.70442199707031, 140.7678680419922, -20.624141693115234, 168.95176696777344, 216.08114624023438, 199.30474853515625, 272.1602783203125, 67.86019897460938, 12.013813018798828, 578.6162109375, 149.9213104248047, -23.15752410888672, 359.09796142578125, 40.53981018066406, -24.829452514648438, 212.1279754638672, -0.4374351501464844, -194.77967834472656, 26.65326690673828, 280.67901611328125, 143.01480102539062, 657.3109741210938, -187.2819366455078, 183.54061889648438, -13.896278381347656, 19.431724548339844, 80.71942138671875, 295.7232971191406, 121.73432922363281, -31.721866607666016, 242.812744140625, 179.21241760253906, -152.59005737304688, 102.20345306396484, -575.5533447265625, 312.667724609375, 338.9034118652344, 62.16712188720703, 252.81883239746094, 372.8426208496094, 36.38703918457031, 579.8133544921875, -51.47006607055664, 28.410449981689453, 569.1660766601562, 80.96846008300781, 100.65225982666016, -1.2582550048828125, -132.52725219726562, 57.16128158569336, 55.43476867675781, 65.64083862304688, -2.9925289154052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 126.3110122680664, "std": 196.211181640625, "min": -234.11607360839844, "p10": -90.36785049438475, "median": 80.58222198486328, "p90": 374.41171875000003, "max": 786.955322265625, "pos_frac": 0.765625, "sample": [231.97802734375, 380.98760986328125, 204.5703125, 105.69315338134766, 390.32781982421875, 173.78599548339844, 786.955322265625, -189.91238403320312, 56.864253997802734, -234.11607360839844, 9.836021423339844, 142.4171142578125, 77.34181213378906, -7.786931991577148, 174.1998291015625, 363.51318359375, 308.54937744140625, 49.13481903076172, 31.349105834960938, 52.26344299316406, 54.14384460449219, 379.08251953125, 161.4828643798828, -107.72139739990234, 107.69735717773438, -140.37460327148438, 234.3625946044922, -83.1790771484375, 307.7956237792969, -93.4487533569336, 2.4148597717285156, -51.86260223388672, 111.11125183105469, 71.50660705566406, 351.6190185546875, -2.9508438110351562, 16.967187881469727, -62.24770736694336, -33.49652099609375, 202.56072998046875, 422.8463439941406, 100.2525405883789, 52.53081512451172, 200.24057006835938, 83.8226318359375, 236.93557739257812, 64.59523010253906, -65.05397033691406, 13.504322052001953, 235.6060028076172, -137.818115234375, 123.54603576660156, 361.7619323730469, 227.28736877441406, 12.6602783203125, 20.28541374206543, 17.162273406982422, 618.835205078125, 297.38873291015625, 574.4308471679688, -35.29657745361328, 272.6783447265625, -148.92442321777344, 1.2131233215332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 186.1329345703125, "std": 202.11236572265625, "min": -312.9362487792969, "p10": -19.379037475585935, "median": 156.02623748779297, "p90": 441.73972778320314, "max": 703.9353637695312, "pos_frac": 0.859375, "sample": [-196.4279022216797, 55.89344787597656, -20.769668579101562, 428.0023193359375, 98.43946838378906, 509.806396484375, 192.53945922851562, 389.7796630859375, 18.072717666625977, 355.1242370605469, 408.1128845214844, 518.3258056640625, 195.47451782226562, 265.61688232421875, 55.76331329345703, 359.7937927246094, 476.53619384765625, 40.100257873535156, 361.6231994628906, 276.773193359375, 210.28839111328125, 34.703575134277344, 49.26725769042969, 45.28180694580078, -14.6622314453125, 252.51576232910156, 46.511085510253906, 43.67570495605469, 87.03357696533203, 130.13900756835938, 228.10894775390625, 374.877197265625, 71.22364044189453, 210.88804626464844, -16.134231567382812, 438.90789794921875, 518.3685302734375, 415.2158203125, 96.57200622558594, -60.47136688232422, 25.858814239501953, 226.32582092285156, -24.304229736328125, 170.88723754882812, 62.121726989746094, 14.656295776367188, 129.5516357421875, 421.64508056640625, 703.9353637695312, 100.24308013916016, 13.345405578613281, -104.16229248046875, 101.12034606933594, 665.5931396484375, 221.9840087890625, -58.520057678222656, 239.45028686523438, 307.71258544921875, 218.29808044433594, -312.9362487792969, 442.953369140625, 8.842010498046875, 141.1652374267578, 245.8504638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 161.00189208984375, "std": 152.23699951171875, "min": -189.7322235107422, "p10": 0.10378417968750103, "median": 147.46871948242188, "p90": 357.4872100830079, "max": 522.2178955078125, "pos_frac": 0.890625, "sample": [314.69598388671875, 413.2703552246094, 111.20821380615234, 191.10028076171875, 206.894775390625, 1.1362724304199219, 182.90098571777344, 379.8551940917969, 118.73625183105469, -123.07891845703125, 58.51056671142578, 310.63629150390625, 146.18890380859375, 33.83404541015625, 216.3303985595703, 40.21422576904297, 32.98420715332031, 26.09321403503418, 154.08094787597656, -46.46940994262695, 143.79885864257812, 222.18783569335938, 217.31402587890625, 522.2178955078125, 67.45004272460938, -23.730804443359375, 128.11038208007812, 336.4835205078125, 90.24864196777344, 234.05929565429688, 257.205078125, -189.7322235107422, 147.80081176757812, 363.23797607421875, 70.31948852539062, 179.75332641601562, 89.98747253417969, 5.5334930419921875, -18.093017578125, 52.51681137084961, 233.3582000732422, 306.54827880859375, 177.93783569335938, 430.30218505859375, 107.04779052734375, 57.11671447753906, 487.05218505859375, 311.3078918457031, 119.85812377929688, 496.43243408203125, 68.3434066772461, 193.86923217773438, 298.5856628417969, 259.456298828125, 228.63644409179688, 344.0687561035156, -0.3387107849121094, 37.02229309082031, 156.18983459472656, 268.9818115234375, 147.13662719726562, 27.182361602783203, 16.679523468017578, -134.4454803466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 86.68630981445312, "std": 205.11019897460938, "min": -364.2518005371094, "p10": -170.28478698730467, "median": 66.92396354675293, "p90": 331.00529785156266, "max": 687.7084350585938, "pos_frac": 0.65625, "sample": [-74.89955139160156, 267.01873779296875, -166.0278778076172, -100.43589782714844, 119.17023468017578, -143.17965698242188, -25.291824340820312, 229.17161560058594, 13.851654052734375, 439.1171569824219, 6.811187744140625, 20.46258544921875, 72.53782653808594, 42.899986267089844, 226.2189178466797, -364.2518005371094, -172.1091766357422, 479.965087890625, 148.58700561523438, -52.206703186035156, 170.36318969726562, 219.86451721191406, -177.6102752685547, 157.98867797851562, -19.356063842773438, 379.658447265625, -158.42745971679688, 292.36199951171875, 109.9213638305664, 62.07637405395508, -6.893581390380859, -37.12588119506836, 290.9396667480469, 58.60884094238281, 116.95638275146484, -45.85322570800781, 143.89852905273438, -328.45257568359375, 115.39642333984375, 285.58251953125, 223.80563354492188, -42.27751159667969, 687.7084350585938, -70.13682556152344, -219.30221557617188, -192.9379119873047, 36.45178985595703, 22.839794158935547, 143.92327880859375, 239.77186584472656, 138.21185302734375, 271.7426452636719, -227.88763427734375, 255.82553100585938, 347.56671142578125, 263.4306640625, 41.922027587890625, -99.44235229492188, -76.51616668701172, 251.0519561767578, 71.77155303955078, 458.17218017578125, 420.5565185546875, 4.364768981933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 159.2041015625, "std": 180.52967834472656, "min": -178.06988525390625, "p10": -82.64109268188473, "median": 165.4166717529297, "p90": 411.8427062988283, "max": 586.4535522460938, "pos_frac": 0.796875, "sample": [227.62274169921875, -6.330583572387695, 34.76679992675781, 1.48583984375, -143.92727661132812, 174.03529357910156, 69.94153594970703, 503.5985107421875, 206.88768005371094, -138.2841339111328, 70.91576385498047, 538.4957275390625, 259.8224792480469, 162.8037872314453, 214.19528198242188, 206.99632263183594, 52.360984802246094, 145.70199584960938, 304.7800598144531, 339.11920166015625, 202.56344604492188, -109.16739654541016, -108.80097198486328, 106.68775939941406, 93.45064544677734, 283.5198669433594, 73.75629425048828, -16.75917625427246, 81.12199401855469, -26.48443603515625, -98.21965789794922, 20.376192092895508, 126.32853698730469, 311.9013366699219, -46.291107177734375, 586.4535522460938, 205.04840087890625, 196.8997802734375, -169.04428100585938, -34.42046356201172, 209.1977996826172, 436.85064697265625, 289.51617431640625, 150.830078125, 213.1517333984375, 72.45308685302734, 288.5062255859375, 129.70175170898438, 284.340087890625, 130.2926025390625, 42.52535629272461, 7.3506622314453125, 168.02955627441406, 239.97372436523438, -178.06988525390625, 258.7867736816406, 432.51727294921875, 200.96737670898438, 198.8602294921875, 520.38330078125, 363.60205078125, -11.26103401184082, 516.788330078125, 319.8604736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 105.92207336425781, "std": 204.53468322753906, "min": -444.985107421875, "p10": -118.9435165405273, "median": 88.46629333496094, "p90": 350.9104248046875, "max": 775.020751953125, "pos_frac": 0.71875, "sample": [501.6015930175781, 17.054203033447266, 342.4049377441406, -66.27706146240234, 232.19418334960938, -293.66107177734375, -444.985107421875, 775.020751953125, -29.372154235839844, 187.55886840820312, 217.54913330078125, -189.92874145507812, 310.14044189453125, 45.75391387939453, 80.07565307617188, -81.9930419921875, -295.2843017578125, 407.1374816894531, 40.978233337402344, 176.4196014404297, 41.722721099853516, 262.4631042480469, -19.45781707763672, -147.49853515625, 15.16015625, 212.3400115966797, -134.77943420410156, -77.20997619628906, 70.50213623046875, 247.1497802734375, 75.47418212890625, -160.56570434570312, 392.9080505371094, -65.81450653076172, 68.4031753540039, 235.71600341796875, 32.69878387451172, 151.52389526367188, 145.5829620361328, 141.28155517578125, 28.069000244140625, 243.5449676513672, -51.056297302246094, 340.104248046875, -5.180999755859375, 372.2211608886719, 225.1447296142578, 184.4290313720703, 103.26355743408203, 135.062255859375, -43.12279510498047, 7.5194549560546875, 572.0697021484375, 33.64500045776367, 354.5556335449219, 196.28036499023438, -22.048490524291992, 186.58538818359375, 96.85693359375, -19.798755645751953, 33.04308319091797, 155.68820190429688, 107.29357147216797, 124.85557556152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 141.09707641601562, "std": 157.73899841308594, "min": -474.97491455078125, "p10": -21.3787254333496, "median": 135.8102798461914, "p90": 350.79319763183605, "max": 478.9739685058594, "pos_frac": 0.828125, "sample": [328.9163818359375, 360.1689758300781, 4.609790802001953, 191.5308837890625, 264.4543151855469, 83.81222534179688, 155.921630859375, 380.1418762207031, 96.05877685546875, 78.1017074584961, 203.53639221191406, 34.67027282714844, 173.39144897460938, 67.38029479980469, 96.32505798339844, 176.7273406982422, 22.26852798461914, 96.82820129394531, 209.57135009765625, -129.77615356445312, 267.5872802734375, 182.8805389404297, -11.852344512939453, 418.3785705566406, 305.5199890136719, -130.40626525878906, -29.57431983947754, 57.208831787109375, -44.422332763671875, 172.20440673828125, -25.46146011352539, 236.73291015625, -2.2076244354248047, 134.61410522460938, 84.68780517578125, 74.1624755859375, 116.0072021484375, 478.9739685058594, 382.56988525390625, 362.7516174316406, 58.931907653808594, 187.59872436523438, 197.27476501464844, 27.856658935546875, 83.74139404296875, 272.3004150390625, 261.0553283691406, 262.8563232421875, 209.12548828125, 64.96824645996094, -3.4740142822265625, -474.97491455078125, 298.35931396484375, 212.5482635498047, 129.95709228515625, 108.26271057128906, -7.772136688232422, 198.6397705078125, 462.625244140625, -75.871337890625, 137.00645446777344, 64.30126953125, 220.0030517578125, 209.899169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 150.87002563476562, "std": 192.83999633789062, "min": -362.2002868652344, "p10": -99.8592658996582, "median": 136.5029525756836, "p90": 419.04707031250007, "max": 643.3164672851562, "pos_frac": 0.78125, "sample": [-92.92623138427734, -9.253273010253906, 265.6936950683594, -30.580734252929688, -58.844451904296875, 101.02620697021484, 233.37841796875, 70.99956512451172, 199.7079620361328, 51.12187194824219, 99.63784790039062, 643.3164672851562, 229.4901580810547, 99.9415512084961, 268.2285461425781, 138.237060546875, 397.90155029296875, 95.44938659667969, -104.88420104980469, 108.35365295410156, 14.90153694152832, -362.2002868652344, 291.3658447265625, 446.6610412597656, 440.4111022949219, 10.152238845825195, 481.094482421875, 50.34259796142578, -27.48876953125, 26.167957305908203, 32.14055633544922, -103.35826110839844, 125.00796508789062, 236.0084228515625, 285.4835510253906, 171.39962768554688, 311.3623962402344, -137.6254119873047, 345.2752685546875, 113.1983413696289, 288.46917724609375, 327.867431640625, 204.69168090820312, 317.1094055175781, 485.75555419921875, 151.6585235595703, 134.7688446044922, 396.2143249511719, 162.8056640625, -220.35824584960938, 428.10943603515625, 203.51644897460938, -118.1192626953125, 104.01282501220703, -102.83056640625, 71.00146484375, 179.3832550048828, 290.64129638671875, 280.0388488769531, -28.32220458984375, -68.513427734375, 24.564239501953125, 487.4743347167969, 199.44744873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 128.94290161132812, "std": 234.5643310546875, "min": -294.5498962402344, "p10": -172.1676177978515, "median": 106.98323440551758, "p90": 434.53589477539083, "max": 925.529052734375, "pos_frac": 0.703125, "sample": [94.79229736328125, -56.91234588623047, 196.31671142578125, 106.16455841064453, 105.95140075683594, 112.60031127929688, 0.0254058837890625, -88.91297149658203, 156.29849243164062, -31.335975646972656, 154.105224609375, 216.25352478027344, 311.6971740722656, 10.938261032104492, -189.87936401367188, -294.5498962402344, 201.72805786132812, -58.32236862182617, 107.80191040039062, 238.2698974609375, 9.430240631103516, 245.30792236328125, 601.1683349609375, 349.9708557128906, 382.4715576171875, -213.1256103515625, 496.19671630859375, 363.7173156738281, -23.396385192871094, -18.17525291442871, 147.23179626464844, 230.1488037109375, 46.67066192626953, -216.85421752929688, 11.448680877685547, 270.1917724609375, -68.15131378173828, 456.84918212890625, -191.885986328125, -17.998577117919922, -50.03978729248047, -226.0692138671875, 51.49205780029297, 488.33697509765625, 64.64777374267578, 256.1239318847656, 5.172040939331055, 372.64471435546875, 54.648948669433594, 158.02728271484375, 925.529052734375, 154.5834197998047, 121.41880798339844, -17.28386688232422, -228.22640991210938, 633.0064697265625, 141.599365234375, 253.86788940429688, 177.01121520996094, 61.19538879394531, -130.8402099609375, 674.2147216796875, 170.81996154785156, -13.7808837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 135.67572021484375, "std": 216.8087921142578, "min": -557.2896728515625, "p10": -110.97924957275387, "median": 90.68087005615234, "p90": 384.30452270507817, "max": 753.6959228515625, "pos_frac": 0.734375, "sample": [753.6959228515625, -130.14154052734375, 14.239471435546875, 405.09234619140625, 232.0665283203125, -70.908447265625, -48.78894805908203, -29.501266479492188, -557.2896728515625, 63.2133903503418, 315.59857177734375, 101.07704162597656, -34.18724822998047, 27.172618865966797, 260.8683776855469, 299.58941650390625, -155.84530639648438, -256.2009582519531, 157.58871459960938, 341.3604736328125, 273.3440246582031, 13.828996658325195, 44.968284606933594, 8.328174591064453, 49.49839401245117, -59.83446502685547, -147.6946563720703, 429.17822265625, -0.3918266296386719, 205.80653381347656, 273.40814208984375, 237.2558135986328, 80.28469848632812, 375.647705078125, 394.5194091796875, 320.7735290527344, 305.3663635253906, 358.3631286621094, 388.01458740234375, 626.5201416015625, 29.78174591064453, -20.86301040649414, 332.847900390625, 158.34146118164062, 4.768871307373047, 241.9249725341797, 37.11497497558594, 252.52438354492188, -128.15245056152344, 354.5085144042969, 459.3973388671875, 52.58567810058594, 200.10675048828125, 52.32215118408203, -130.78652954101562, 198.56317138671875, -10.467124938964844, -27.155494689941406, 25.05736541748047, 155.94613647460938, -54.011680603027344, 32.84474563598633, 286.0138244628906, 314.14801025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 151.77967834472656, "std": 200.36892700195312, "min": -346.6300964355469, "p10": -86.12678298950195, "median": 134.7265167236328, "p90": 397.6888305664063, "max": 648.5597534179688, "pos_frac": 0.75, "sample": [122.74583435058594, 621.6910400390625, 121.75614166259766, 262.995361328125, 212.97634887695312, 277.5771179199219, 32.746681213378906, 203.76153564453125, -27.586902618408203, 25.311813354492188, 342.8800964355469, 271.43267822265625, 233.84323120117188, 401.90618896484375, 305.4291687011719, -155.502685546875, 52.846187591552734, 265.49835205078125, -346.6300964355469, 41.627017974853516, 315.857666015625, 145.03077697753906, -187.96092224121094, 330.5201416015625, 59.64773941040039, 295.0793762207031, -83.76677703857422, -116.13859558105469, -89.20039367675781, 192.2108612060547, 438.2385559082031, -8.737701416015625, 242.38507080078125, 120.79728698730469, 106.63065338134766, 122.2582015991211, 387.84832763671875, -61.110565185546875, 59.13232421875, 46.68562316894531, -203.6592559814453, 57.037261962890625, 22.549591064453125, -35.8763427734375, 201.177734375, -13.48141860961914, 232.88153076171875, 155.04257202148438, -87.13821411132812, 259.1518249511719, -37.87257385253906, 124.42225646972656, -13.974189758300781, 221.302490234375, 361.139892578125, 482.080810546875, 193.6600799560547, 58.29646682739258, 648.5597534179688, 552.9583129882812, 235.19735717773438, 440.1388854980469, -54.549400329589844, 332.1412658691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 152.44454956054688, "std": 204.80377197265625, "min": -315.29107666015625, "p10": -59.415187835693345, "median": 111.68711471557617, "p90": 474.08353576660176, "max": 625.64208984375, "pos_frac": 0.78125, "sample": [522.929931640625, -44.44521713256836, 14.837966918945312, 100.09043884277344, 377.7536315917969, 8.355850219726562, 268.3506164550781, 424.55419921875, 264.5960388183594, 193.74441528320312, -14.584692001342773, -64.22689056396484, 108.10127258300781, 319.205322265625, 106.45354461669922, 25.87615966796875, 239.47952270507812, 251.83348083496094, 31.516010284423828, 495.3103942871094, 7.8868255615234375, 399.7263488769531, 277.08026123046875, -211.5574493408203, 93.57575225830078, 145.00418090820312, -124.87831115722656, 35.16051483154297, -27.881771087646484, 543.6431884765625, 170.70265197753906, 255.10667419433594, 278.93408203125, 625.64208984375, 115.46544647216797, -90.97354125976562, 112.57670593261719, 8.860626220703125, 130.91757202148438, -315.29107666015625, 110.79752349853516, 579.57666015625, 43.40105056762695, 259.81475830078125, 229.6296844482422, 203.63284301757812, 81.14139556884766, -11.230155944824219, 599.0801391601562, 254.843994140625, 301.2174377441406, 294.3114013671875, -38.823143005371094, 94.31774139404297, 50.84466552734375, 512.0955810546875, -91.75846862792969, -9.348489761352539, 195.80328369140625, -213.43698120117188, 19.275955200195312, 43.46992492675781, -48.18788146972656, 236.54901123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 149.61134338378906, "std": 214.08973693847656, "min": -541.3336181640625, "p10": -81.19235534667968, "median": 142.30673217773438, "p90": 404.78930664062506, "max": 764.783447265625, "pos_frac": 0.765625, "sample": [390.98541259765625, 21.455137252807617, -84.44309997558594, 283.5953674316406, 77.61199951171875, 147.97003173828125, 273.3539123535156, 263.4995422363281, 216.67108154296875, -19.138397216796875, -28.523082733154297, -44.68579864501953, -93.31414031982422, 76.89906311035156, 103.81136322021484, -16.190444946289062, -39.81470489501953, 107.10079956054688, 199.40419006347656, -23.83435821533203, 70.03471374511719, 259.8493347167969, 550.8910522460938, 230.7704315185547, 494.1355895996094, 276.2564697265625, 208.17327880859375, 17.969337463378906, 136.6434326171875, 380.00341796875, 235.6398468017578, 324.0992431640625, 320.24395751953125, 193.2879180908203, -227.1865234375, 102.07676696777344, 294.02447509765625, -362.280517578125, -19.90186309814453, 120.59882354736328, 421.97271728515625, 5.275821685791016, -109.8734130859375, 410.70526123046875, 14.502391815185547, -541.3336181640625, 764.783447265625, 132.05181884765625, -167.89309692382812, 116.42754364013672, 236.155517578125, -73.60728454589844, 296.43035888671875, 377.60125732421875, 417.899169921875, 44.37310028076172, 305.6004638671875, 447.1537170410156, 169.421142578125, 250.80450439453125, 86.34834289550781, 165.57516479492188, 274.6329345703125, 112.37604522705078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 122.17149353027344, "std": 193.6431121826172, "min": -263.8575439453125, "p10": -108.80062942504883, "median": 101.77742385864258, "p90": 375.19096069335944, "max": 587.413330078125, "pos_frac": 0.765625, "sample": [181.3856658935547, -14.598867416381836, 135.53564453125, 195.27391052246094, 16.60409164428711, -100.2972412109375, 347.5023498535156, 438.2237854003906, 503.8582458496094, 232.41444396972656, 53.64801788330078, -112.44493865966797, 118.97721862792969, -70.23889923095703, 0.367431640625, -95.67729187011719, 347.9047546386719, -62.84112548828125, 520.1398315429688, 185.53274536132812, 138.62649536132812, 79.8980712890625, 157.472900390625, 39.22220230102539, 284.0340576171875, 44.73312759399414, 44.117591857910156, 42.09832763671875, 587.413330078125, -242.4644317626953, 72.53408813476562, 36.85145568847656, 541.471435546875, 183.84852600097656, 84.87175750732422, -179.8692169189453, 142.84432983398438, 354.43829345703125, -124.32909393310547, 312.91522216796875, -200.21258544921875, 27.185096740722656, 344.41790771484375, -124.61936950683594, 57.4281005859375, 49.97758483886719, 384.6045837402344, 257.1280212402344, -263.8575439453125, 160.8634796142578, -24.96705436706543, 303.14971923828125, 258.2974853515625, 12.849029541015625, 384.0849609375, -78.83631134033203, 131.38385009765625, 317.6560974121094, 133.4632568359375, -77.16516876220703, 118.68309020996094, 56.86545944213867, 151.69747924804688, 16.89972686767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 162.20970153808594, "std": 182.82005310058594, "min": -157.73451232910156, "p10": -28.660858917236318, "median": 129.55355072021484, "p90": 398.1942352294922, "max": 989.69091796875, "pos_frac": 0.859375, "sample": [231.42483520507812, -69.6898422241211, 27.939918518066406, -131.74185180664062, 27.615591049194336, 176.98422241210938, 109.98653411865234, 131.40428161621094, 354.14990234375, 53.98106384277344, 67.2783203125, 431.2303161621094, 79.8834228515625, 123.85777282714844, 141.6370086669922, 989.69091796875, 147.8606719970703, 247.3712158203125, 37.32110595703125, 461.021728515625, -157.73451232910156, 167.8663330078125, -6.329069137573242, 146.32012939453125, -90.73059844970703, -35.730140686035156, 135.2803192138672, -32.15083312988281, 212.49156188964844, 113.99986267089844, 95.89335632324219, 329.2869873046875, -20.51758575439453, 213.7313995361328, 123.94635009765625, 333.1224365234375, 389.65655517578125, 171.85012817382812, 128.92909240722656, 118.81884765625, 111.93309020996094, 90.4676513671875, 316.67999267578125, 143.30038452148438, 148.68978881835938, 44.16268539428711, 55.24244689941406, 128.81668090820312, 196.73428344726562, 208.90965270996094, 270.3607177734375, 198.6385498046875, 130.17800903320312, 434.60107421875, 50.220359802246094, -48.209022521972656, 70.08010864257812, 412.88555908203125, 37.80622863769531, 609.227783203125, 52.424949645996094, 401.8532409667969, 60.628936767578125, 278.57977294921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 151.08978271484375, "std": 204.390869140625, "min": -309.0653991699219, "p10": -56.91553039550781, "median": 134.57278060913086, "p90": 378.30799560546876, "max": 959.8856811523438, "pos_frac": 0.8125, "sample": [422.3288269042969, 377.61004638671875, 17.67456817626953, -202.4294891357422, 95.50740051269531, 959.8856811523438, 28.62238883972168, 318.376220703125, 2.7519187927246094, 206.75552368164062, -1.6907825469970703, 75.58074188232422, -50.76483154296875, 378.60711669921875, 89.21507263183594, 230.30032348632812, 193.62274169921875, 322.4087219238281, 370.314453125, -28.28142547607422, 70.42619323730469, 101.16534423828125, 155.18771362304688, 197.2044677734375, 167.4605712890625, 355.2677001953125, 528.2564086914062, -26.914764404296875, 302.11553955078125, 221.4517364501953, 46.55387878417969, 20.67530059814453, 16.30084228515625, 74.22273254394531, 213.73202514648438, 50.04924011230469, 266.62017822265625, 380.805419921875, 263.233154296875, 147.26303100585938, 233.011962890625, -80.40618896484375, 499.3343200683594, 375.903564453125, 33.74256134033203, 360.236572265625, -192.4376678466797, 222.1085662841797, 17.851051330566406, 202.48141479492188, 121.88253021240234, 471.72314453125, 7.840843200683594, -309.0653991699219, -165.19561767578125, 98.37586212158203, -122.54324340820312, -15.323301315307617, 147.85797119140625, 187.0696563720703, 55.00281524658203, 58.838279724121094, 163.53573608398438, -59.551544189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 125.20675659179688, "std": 159.93893432617188, "min": -253.2008056640625, "p10": -67.59209518432617, "median": 129.02147674560547, "p90": 296.43562622070317, "max": 715.6533813476562, "pos_frac": 0.828125, "sample": [21.876113891601562, 415.7557067871094, 113.91500091552734, 183.8873291015625, -218.55543518066406, 336.1493835449219, 464.063720703125, 131.33934020996094, 147.96337890625, 235.55633544921875, -141.9690704345703, -253.2008056640625, 115.90362548828125, 715.6533813476562, 116.86495971679688, -171.44259643554688, 142.5128631591797, 243.40048217773438, 28.42814064025879, 160.169677734375, 142.57803344726562, 79.86702728271484, -72.14828491210938, 327.2041931152344, 162.6737060546875, -25.264419555664062, 33.13399124145508, 372.836181640625, 159.98486328125, 246.21560668945312, 46.56352233886719, 90.43701934814453, 120.48455810546875, -70.51020812988281, 134.604736328125, 101.19847869873047, 171.04010009765625, 10.796890258789062, 181.9295654296875, 165.8349609375, 126.70361328125, 122.04537200927734, 22.744285583496094, 286.57525634765625, 200.7207794189453, 132.3419189453125, 242.54525756835938, 95.49205780029297, -50.266990661621094, -38.506690979003906, 93.54936218261719, 72.56239318847656, 38.13725280761719, 300.6614990234375, -126.30560302734375, -60.783164978027344, 107.21701049804688, 183.54412841796875, 204.0519256591797, 273.6951599121094, 154.58731079101562, 39.25962829589844, 255.9496612548828, 168.97906494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 175.4049530029297, "std": 205.0843505859375, "min": -328.2548828125, "p10": -45.59385299682616, "median": 156.7095489501953, "p90": 444.5105346679689, "max": 722.119140625, "pos_frac": 0.8125, "sample": [328.0269470214844, -328.2548828125, 111.60086059570312, 151.1174774169922, 365.52960205078125, 47.60363006591797, 269.32196044921875, 48.210052490234375, 53.02876281738281, 175.46505737304688, -281.029296875, 177.42347717285156, 89.7934799194336, 323.1102294921875, 302.87091064453125, 172.3341522216797, 22.224292755126953, 380.262451171875, 111.93006134033203, 169.172607421875, -33.09821319580078, -84.064453125, 60.775611877441406, 331.94915771484375, -11.61374282836914, -60.97418212890625, 334.6079406738281, 110.07608032226562, -21.957054138183594, 75.09400939941406, 40.50379943847656, 18.618337631225586, 328.2791442871094, 60.85492706298828, -50.949127197265625, 518.6925659179688, 221.40753173828125, 95.83924865722656, 385.32427978515625, 539.3728637695312, 103.28448486328125, 460.6717529296875, 122.14752197265625, 257.0666198730469, 282.5963439941406, 148.21932983398438, 463.91729736328125, -5.1118316650390625, 238.99761962890625, 243.39236450195312, 354.7121887207031, 406.801025390625, 694.1466064453125, -162.7935028076172, 4.3119964599609375, 485.33843994140625, -17.989723205566406, -84.15406036376953, 248.82899475097656, 246.4468994140625, 135.83116149902344, 722.119140625, 162.30162048339844, 166.35366821289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 153.38145446777344, "std": 205.0962371826172, "min": -251.62388610839844, "p10": -80.80555496215818, "median": 138.15026092529297, "p90": 449.12784729003914, "max": 615.051513671875, "pos_frac": 0.8125, "sample": [245.50772094726562, 307.7626037597656, 52.37552261352539, 615.051513671875, 57.724822998046875, -251.62388610839844, -155.35049438476562, 397.5383605957031, 61.990665435791016, 471.985107421875, -62.170249938964844, 36.581947326660156, 176.2654266357422, -171.66973876953125, 143.20864868164062, 184.80035400390625, 311.8216247558594, 457.42279052734375, 388.8993835449219, 346.69927978515625, 54.00201416015625, 299.0057373046875, 273.0972900390625, 184.58502197265625, 59.221580505371094, 4.2802734375, -39.090492248535156, 6.8064422607421875, -11.550163269042969, 179.9317169189453, -88.7921142578125, 49.50391387939453, 84.76990509033203, 583.5589599609375, 2.657958984375, 429.7729797363281, 137.48330688476562, 220.64529418945312, 56.00090026855469, 299.88641357421875, 57.57817077636719, 217.4752655029297, 172.95223999023438, 103.03598022460938, 11.738441467285156, 603.8699340820312, 357.4451904296875, -242.03720092773438, 195.93045043945312, -169.99252319335938, 205.19227600097656, 31.390277862548828, 230.87844848632812, 457.7984619140625, -48.87132263183594, 138.8172149658203, 32.488807678222656, 91.30514526367188, -197.95213317871094, 519.071044921875, 311.2622375488281, 302.54705810546875, -13.003730773925781, 46.894683837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 138.47540283203125, "std": 247.73529052734375, "min": -568.1148681640625, "p10": -145.2595489501953, "median": 111.48730850219727, "p90": 445.65148925781256, "max": 792.6138916015625, "pos_frac": 0.75, "sample": [83.2499008178711, 430.5340270996094, 367.42535400390625, 243.03382873535156, 42.26366424560547, 112.1650390625, 293.8956298828125, 23.972694396972656, 662.0675048828125, 86.05960083007812, 226.7740478515625, -280.6527099609375, 20.70566177368164, -33.83450698852539, -166.87432861328125, 38.93387985229492, 331.6432189941406, -198.48248291015625, 79.18830108642578, -92.11959838867188, 246.3326416015625, 792.6138916015625, -568.1148681640625, 565.222900390625, -156.37100219726562, 233.22463989257812, 228.83786010742188, 480.4732971191406, -64.31295776367188, 750.353271484375, -225.1531219482422, 299.0830993652344, 79.06117248535156, -42.89152145385742, 99.81735229492188, -119.33282470703125, 5.07183837890625, -76.97541809082031, 348.9472961425781, -87.47953796386719, 452.1304016113281, 123.07879638671875, 59.9151611328125, 383.3315124511719, -54.88343048095703, 175.09559631347656, 27.552772521972656, 164.30484008789062, -26.642776489257812, 252.1107177734375, 100.29154205322266, 128.24468994140625, 161.75894165039062, 361.15521240234375, 152.48777770996094, 147.79611206054688, 228.54978942871094, 110.80957794189453, 21.541316986083984, -319.7038879394531, 528.5344848632812, 88.2724609375, 319.09259033203125, 219.24549865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 113.96561431884766, "std": 202.9491424560547, "min": -281.8428955078125, "p10": -79.052001953125, "median": 84.41139602661133, "p90": 409.6531036376954, "max": 726.4661865234375, "pos_frac": 0.703125, "sample": [-63.93391418457031, 560.9795532226562, 40.60919952392578, 134.10235595703125, 222.9687042236328, 133.5590057373047, -71.43458557128906, 285.47528076171875, 89.3316650390625, 431.2210693359375, 119.83827209472656, 256.51837158203125, -54.06462097167969, 287.8108825683594, -79.49159240722656, 53.773033142089844, 252.45947265625, 87.41665649414062, 168.0497589111328, 81.40613555908203, 129.30528259277344, -281.8428955078125, -53.283729553222656, 384.2784118652344, 109.4921875, 473.5372009277344, 211.61961364746094, 23.00366973876953, 40.44318389892578, 391.782958984375, 484.1708984375, 310.3553161621094, 726.4661865234375, 476.850830078125, 191.2355194091797, 55.24383544921875, 32.94431686401367, 261.01275634765625, -3.8408889770507812, 74.63253021240234, 61.3782958984375, 132.11175537109375, 35.15100860595703, 20.282751083374023, 135.9175262451172, -49.95835494995117, -275.76190185546875, -233.18856811523438, 249.9760284423828, 73.25801849365234, -218.00009155273438, -8.004875183105469, -78.02629089355469, -127.23068237304688, -40.774314880371094, -23.336517333984375, -34.65435028076172, -55.17034149169922, -180.48648071289062, 417.3117370605469, 187.312255859375, 216.45693969726562, 25.589065551757812, 89.64448547363281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 145.29331970214844, "std": 217.614990234375, "min": -476.1680603027344, "p10": -63.51975822448729, "median": 117.15250015258789, "p90": 372.93131713867194, "max": 784.651123046875, "pos_frac": 0.796875, "sample": [99.41100311279297, 133.51193237304688, 342.4859619140625, -50.2995491027832, 135.0877227783203, -16.82832145690918, 288.822265625, 175.42510986328125, 26.851318359375, 40.87456130981445, -90.31109619140625, -141.34194946289062, 41.965362548828125, 382.77105712890625, 177.24517822265625, 124.7080307006836, -101.38472747802734, 222.849609375, 581.8973999023438, 730.9771118164062, 94.07028198242188, 96.43619537353516, 53.818641662597656, 22.16583251953125, -79.55146789550781, 97.27659606933594, 40.1605224609375, 100.01850128173828, 345.1185302734375, 223.11085510253906, 573.9492797851562, 462.9646911621094, 784.651123046875, 38.242218017578125, -40.83571243286133, 279.2781677246094, 26.663066864013672, -47.045631408691406, 17.30874252319336, 132.96197509765625, 308.7086181640625, 31.322906494140625, -11.284395217895508, 554.772216796875, -476.1680603027344, -13.362174987792969, -377.40447998046875, 214.38031005859375, 175.07037353515625, 117.14056396484375, 197.44325256347656, 222.5555419921875, 93.45089721679688, 309.85833740234375, 88.79987335205078, 178.1775665283203, -69.18556213378906, 324.1544494628906, 131.02886962890625, 98.69368743896484, 349.971923828125, 130.87606811523438, 277.1274719238281, 117.16443634033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 147.9793243408203, "std": 214.91481018066406, "min": -355.28131103515625, "p10": -53.17239761352538, "median": 123.82307815551758, "p90": 410.64741821289084, "max": 771.6077270507812, "pos_frac": 0.8125, "sample": [33.15363311767578, 497.4515380859375, 62.728248596191406, -46.146949768066406, 32.24128723144531, 63.97834014892578, 142.74459838867188, 88.44471740722656, 431.1806640625, 67.11886596679688, 267.4979553222656, -336.79046630859375, 121.23067474365234, 37.421966552734375, 242.48944091796875, -205.05987548828125, 106.25884246826172, 65.55088806152344, -228.9669189453125, 127.6731185913086, 155.0755615234375, 771.6077270507812, 94.92658996582031, 167.34207153320312, 65.21986389160156, 305.00982666015625, 466.25860595703125, 618.4805297851562, -43.70550537109375, 331.2719421386719, -98.86141204833984, 106.19134521484375, -62.32421875, -33.90506362915039, 585.1422729492188, 185.4784698486328, 162.65872192382812, 162.1861114501953, 222.1153564453125, 179.9477996826172, 168.0772247314453, 316.2332763671875, 18.68523406982422, -56.18330383300781, 362.73651123046875, 24.83447265625, 126.41548156738281, 340.4325256347656, 61.58149719238281, 248.56036376953125, 236.3310546875, 85.52696228027344, 752.3603515625, 207.583740234375, 97.70503234863281, 47.54692077636719, 130.01673889160156, 32.722145080566406, 351.80267333984375, -1.8130168914794922, 218.413330078125, -355.28131103515625, -22.870288848876953, 168.94227600097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 171.74826049804688, "std": 238.8266143798828, "min": -510.4214782714844, "p10": -94.0348129272461, "median": 169.17974853515625, "p90": 473.61170959472656, "max": 689.562255859375, "pos_frac": 0.71875, "sample": [135.8584747314453, 326.29254150390625, 155.77947998046875, 307.6859130859375, -80.9497299194336, 208.0074462890625, 300.3346862792969, 211.38662719726562, -176.63119506835938, -54.76268768310547, 324.12847900390625, -19.235687255859375, 475.0957336425781, 17.831451416015625, 321.58831787109375, 333.89715576171875, -45.37604522705078, 217.96949768066406, -93.17781066894531, -510.4214782714844, 269.6940002441406, 38.15016555786133, 250.44049072265625, -225.10232543945312, -111.92607116699219, 444.98785400390625, -27.17070770263672, 628.2286376953125, 512.6922607421875, 136.7696533203125, 164.0855712890625, 174.27392578125, 365.041748046875, 266.61798095703125, 19.215621948242188, 396.1788330078125, 439.1297912597656, 67.3896713256836, 308.523193359375, 74.59259033203125, 9.577766418457031, 161.62342834472656, -262.57855224609375, 48.879547119140625, 137.8333282470703, 578.557373046875, 326.458740234375, 556.2637329101562, 306.9242858886719, -67.04365539550781, 305.8760070800781, 298.13427734375, -34.536834716796875, 689.562255859375, -94.402099609375, 475.6283874511719, -32.955780029296875, -71.69336700439453, -24.422435760498047, 277.79376220703125, 470.14898681640625, 445.6834411621094, -190.55001831054688, 134.01181030273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 138.94430541992188, "std": 226.7766876220703, "min": -442.9269104003906, "p10": -117.91195526123046, "median": 135.81584930419922, "p90": 353.21845703125, "max": 753.7080078125, "pos_frac": 0.796875, "sample": [214.95742797851562, 326.78173828125, 444.0806884765625, -304.2092590332031, -442.9269104003906, -120.50389099121094, 35.05138397216797, 290.0380859375, 132.2677459716797, 350.5746765136719, 339.6610107421875, -62.809539794921875, 183.55435180664062, 196.7081298828125, 22.261260986328125, 66.2320785522461, 139.36395263671875, 202.95880126953125, -200.42376708984375, 306.4558410644531, 294.1519470214844, 168.94842529296875, 3.0703506469726562, 251.14019775390625, 477.44061279296875, 291.8066711425781, 28.375598907470703, 49.31885528564453, 292.9892272949219, 150.10848999023438, -284.2576904296875, 331.949462890625, 745.8828735351562, -94.11695861816406, -226.45123291015625, 331.90423583984375, 436.13653564453125, 23.818819046020508, 123.40464782714844, -11.67294692993164, 753.7080078125, -254.8492431640625, 58.40087890625, 33.742733001708984, 165.73056030273438, 142.4971923828125, 8.559463500976562, 289.764892578125, -111.86410522460938, -17.785320281982422, 277.20526123046875, 353.8787536621094, 63.60749816894531, 41.44337463378906, -21.194290161132812, 186.21170043945312, 38.47247314453125, 162.70733642578125, 9.708183288574219, 351.6777648925781, 120.67015075683594, 117.10556030273438, 51.59415817260742, 567.4202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 176.25228881835938, "std": 209.08555603027344, "min": -194.55502319335938, "p10": -59.13335189819335, "median": 125.91094207763672, "p90": 476.86207885742186, "max": 769.8822631835938, "pos_frac": 0.78125, "sample": [769.8822631835938, 393.7613220214844, 85.06187438964844, -78.09396362304688, 289.78863525390625, -7.301898956298828, 470.9841613769531, 328.1065979003906, -25.265953063964844, -6.402687072753906, 338.997314453125, -167.19195556640625, 480.65582275390625, -194.55502319335938, 61.106834411621094, -64.31551361083984, 96.40640258789062, -96.85640716552734, 211.03404235839844, -13.65045166015625, -94.89556121826172, -44.62434387207031, 508.11614990234375, 445.42919921875, 599.37939453125, 476.9876708984375, 70.18804168701172, -47.04164123535156, 76.52114868164062, 163.74224853515625, 12.41647720336914, 193.8466033935547, 118.511962890625, 5.418601989746094, -94.18612670898438, 166.4143829345703, 63.94586181640625, 41.20732879638672, 422.3799133300781, 105.63417053222656, 3.3071365356445312, 592.9075927734375, 57.048500061035156, -6.06926155090332, 133.30992126464844, 43.56338119506836, 169.01705932617188, 250.0490264892578, 360.7931213378906, 260.9287414550781, 476.56903076171875, 235.4100341796875, 184.66494750976562, 72.67256164550781, 549.4881591796875, 74.15787506103516, 286.01507568359375, 346.6803283691406, 45.879150390625, 226.51451110839844, 242.44174194335938, 278.1816101074219, 100.79681396484375, 234.27720642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 221.74072265625, "std": 220.99012756347656, "min": -205.61013793945312, "p10": -61.16924362182617, "median": 225.55753326416016, "p90": 460.3768218994141, "max": 970.070556640625, "pos_frac": 0.78125, "sample": [380.98858642578125, 345.2298583984375, -62.34130859375, -205.61013793945312, -5.9200897216796875, 438.0030517578125, 299.629150390625, 430.27783203125, 558.8309936523438, 338.42236328125, 376.593994140625, 480.9946594238281, -34.54279327392578, 140.06283569335938, -93.45527648925781, 31.201311111450195, 161.79043579101562, 236.46266174316406, 218.02737426757812, 130.48544311523438, -106.4544677734375, 370.0887145996094, 405.2517395019531, 290.4501953125, 365.20562744140625, 138.03582763671875, -35.93061447143555, 242.10018920898438, 125.67716217041016, 970.070556640625, -28.312240600585938, 235.3408203125, 259.7635192871094, 240.76773071289062, 31.117998123168945, 413.4199523925781, 449.5495300292969, -128.5775909423828, 197.68807983398438, 219.37884521484375, 686.31201171875, -20.344268798828125, 552.2012939453125, 163.26492309570312, 390.36358642578125, -84.44489288330078, 388.01763916015625, 60.4217643737793, 220.8090057373047, 393.4741516113281, -23.773361206054688, 221.5769500732422, 61.39874267578125, -119.62071228027344, -58.434425354003906, 396.0982666015625, 252.26229858398438, 46.91753387451172, 465.01708984375, 128.0340576171875, 229.53811645507812, 378.4073181152344, 467.5052490234375, 176.64202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 204.66293334960938, "std": 247.25949096679688, "min": -397.408935546875, "p10": -101.72671127319336, "median": 195.19210052490234, "p90": 473.68974609375, "max": 796.9132080078125, "pos_frac": 0.78125, "sample": [327.1545104980469, 340.3206787109375, -24.654144287109375, 197.67567443847656, 701.5393676757812, 464.47943115234375, 227.32089233398438, -99.03423309326172, 756.0934448242188, -102.88063049316406, 470.5740966796875, 186.63812255859375, -69.43820190429688, 80.00725555419922, 30.717958450317383, 181.48245239257812, 4.179744720458984, 245.68331909179688, 208.69888305664062, 341.5338134765625, 242.89756774902344, 54.42625427246094, 65.64593505859375, 173.99339294433594, 128.32130432128906, 75.27457427978516, 357.38623046875, 156.0623321533203, 262.1844787597656, 192.70852661132812, 360.056884765625, -49.963157653808594, 347.1177062988281, 313.3478698730469, -40.28899383544922, 796.9132080078125, 566.0615234375, 260.66253662109375, 173.83580017089844, 355.95111083984375, -198.45083618164062, -174.972900390625, 320.00128173828125, -58.553321838378906, 283.8334045410156, 364.04083251953125, 475.0250244140625, 12.156782150268555, 127.68113708496094, -397.408935546875, 734.7183837890625, 149.31298828125, 65.98692321777344, -165.06820678710938, -152.01535034179688, 341.59173583984375, -76.97636413574219, 158.1643829345703, 436.74359130859375, -127.30183410644531, 405.34930419921875, 276.0614013671875, 453.3797912597656, 584.4419555664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 149.2162322998047, "std": 228.20584106445312, "min": -261.6282043457031, "p10": -140.15299682617186, "median": 132.01608276367188, "p90": 489.52746887207036, "max": 759.2691650390625, "pos_frac": 0.71875, "sample": [79.6846694946289, 91.19197082519531, 45.708831787109375, -211.61863708496094, 242.1175537109375, 193.52481079101562, 219.7063446044922, -22.15766143798828, 104.85625457763672, -51.7823486328125, -116.53369140625, 511.6156311035156, 628.4772338867188, 276.90142822265625, 341.8556213378906, 172.4649200439453, -89.00775909423828, 493.63409423828125, -261.6282043457031, 317.8841552734375, -162.31224060058594, 104.77427673339844, -83.6622314453125, 72.99160766601562, 64.93441009521484, 249.03961181640625, 513.935791015625, -233.831787109375, 66.32823944091797, 759.2691650390625, -32.94742202758789, 131.01235961914062, 350.8357238769531, 140.5787353515625, -27.84613609313965, 289.0303955078125, 479.9453430175781, -131.67318725585938, 183.90699768066406, 42.92640686035156, 245.47854614257812, -43.33583068847656, 161.8157196044922, 308.533935546875, 423.8460998535156, -33.859764099121094, 106.89082336425781, 9.976675033569336, 163.64877319335938, 582.3009033203125, 335.4245300292969, 174.3064727783203, 3.591806411743164, 162.67782592773438, -235.5965576171875, 324.0682373046875, -147.17160034179688, -143.78720092773438, 524.9928588867188, 226.68807983398438, 133.01980590820312, 448.53009033203125, -53.663299560546875, 127.3299331665039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 166.55404663085938, "std": 214.24404907226562, "min": -266.5222473144531, "p10": -127.27030563354492, "median": 170.63775634765625, "p90": 449.16541137695316, "max": 652.156982421875, "pos_frac": 0.765625, "sample": [194.3917694091797, 68.99067687988281, 322.3466491699219, 8.510066986083984, -16.499170303344727, -123.9734878540039, 457.7142028808594, 141.1924285888672, 498.9156188964844, -130.91549682617188, 231.98898315429688, 443.5630187988281, 652.156982421875, 82.73686218261719, 242.37811279296875, 397.8570556640625, -32.6497688293457, -31.219390869140625, 170.87185668945312, 258.65350341796875, -144.45135498046875, 243.52456665039062, 451.5664367675781, 37.35956573486328, 1.6855850219726562, -203.1954345703125, 282.67938232421875, 188.3804931640625, 170.40365600585938, 59.79613494873047, -78.64119720458984, 3.125762939453125, 356.01361083984375, 294.5235595703125, -28.359651565551758, 79.24336242675781, -266.5222473144531, -141.19903564453125, -46.86651611328125, 41.66725158691406, 89.28359985351562, 419.89263916015625, -195.62918090820312, 173.5760955810547, 241.6772003173828, 512.3994140625, 240.51983642578125, 604.8859252929688, 199.6138458251953, 551.60791015625, 431.0700988769531, -34.584442138671875, 320.5447082519531, 405.6121826171875, 25.777259826660156, 387.4587097167969, 142.95164489746094, 327.67974853515625, -128.6832275390625, 174.78707885742188, 26.218332290649414, 307.2917785644531, 154.3985595703125, 143.36459350585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 151.22344970703125, "std": 250.78211975097656, "min": -409.351318359375, "p10": -134.86840667724604, "median": 133.05116271972656, "p90": 466.4665954589844, "max": 975.6294555664062, "pos_frac": 0.78125, "sample": [110.29784393310547, 975.6294555664062, 151.08872985839844, 381.45538330078125, 562.0740356445312, 5.587682723999023, -75.8753662109375, 796.75048828125, 54.35969543457031, 114.16932678222656, 153.52723693847656, 18.077186584472656, -34.51739501953125, 273.83636474609375, 324.7813720703125, 286.2739562988281, 192.4033203125, -269.6282653808594, 57.22560119628906, -43.682979583740234, 30.031368255615234, 352.07403564453125, 182.97132873535156, 374.0260009765625, -160.15113830566406, -259.3177795410156, -53.47578811645508, 44.87858200073242, 166.20675659179688, 343.5071716308594, -72.76582336425781, 150.13229370117188, 16.07219696044922, 129.17770385742188, 386.609375, 253.36074829101562, -409.351318359375, -290.20538330078125, 4.087377548217773, 12.237300872802734, -167.27337646484375, 78.99578857421875, 19.673778533935547, 558.552734375, 136.92462158203125, 103.92476654052734, 296.63848876953125, 248.44871520996094, 277.8280944824219, 21.460365295410156, 467.8499755859375, -38.54954528808594, 167.0161895751953, 243.41448974609375, -40.267578125, 233.42652893066406, 201.7801971435547, 172.24598693847656, 463.23870849609375, 74.83511352539062, -222.91387939453125, 613.7256469726562, 39.70250701904297, 493.6837158203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 166.16348266601562, "std": 217.4093780517578, "min": -295.3143310546875, "p10": -113.75624771118164, "median": 132.5151824951172, "p90": 462.0395660400391, "max": 595.4842529296875, "pos_frac": 0.8125, "sample": [4.087123870849609, 294.979248046875, -113.80705261230469, 129.350341796875, 39.13290023803711, 343.57916259765625, 424.52960205078125, 202.98233032226562, -131.26771545410156, 270.14874267578125, 186.20663452148438, 8.113672256469727, 414.2056884765625, -26.66620445251465, 160.4591827392578, 412.15240478515625, 47.87049102783203, 106.52203369140625, 199.3482208251953, 322.37689208984375, -41.402488708496094, 391.0600280761719, 68.68891906738281, 34.659156799316406, -227.6900634765625, -113.63770294189453, 556.23828125, 54.798065185546875, -23.793487548828125, 179.59512329101562, -295.3143310546875, 595.4842529296875, 124.56404876708984, 344.1453857421875, -233.42803955078125, -183.02749633789062, 585.5848999023438, 266.8110656738281, 150.69366455078125, 4.0997314453125, 283.416015625, 87.4869384765625, 551.2382202148438, 12.991504669189453, 15.978439331054688, 91.20084381103516, 225.48046875, 433.7309875488281, 468.45306396484375, 529.2567138671875, -205.14804077148438, 275.1365966796875, 105.3509750366211, 99.30907440185547, 26.521560668945312, -14.334758758544922, 253.0613555908203, 135.68002319335938, 478.9075012207031, 447.0747375488281, 89.34342956542969, 177.66246032714844, 124.8815689086914, 409.3504943847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 189.73814392089844, "std": 222.3857421875, "min": -166.36947631835938, "p10": -66.44141731262206, "median": 151.28755950927734, "p90": 515.4825805664063, "max": 697.1707153320312, "pos_frac": 0.796875, "sample": [-69.35780334472656, 372.7420959472656, -88.41155242919922, 78.99081420898438, 311.4246520996094, 380.54754638671875, 224.7861328125, 107.45948791503906, 202.3780517578125, 105.20874786376953, 243.94625854492188, 204.62307739257812, 40.99352264404297, -129.93368530273438, 92.6376953125, 281.7618408203125, 32.13627624511719, 322.52276611328125, 645.5281982421875, -14.465950012207031, 19.412397384643555, 697.1707153320312, 460.8660888671875, 211.17953491210938, 318.1048278808594, 277.60980224609375, 139.23733520507812, 14.861595153808594, 197.84768676757812, 505.885009765625, 488.1818542480469, -146.8460235595703, 106.72079467773438, 62.87823486328125, 588.1099243164062, 550.2236938476562, 2.4877090454101562, -107.48272705078125, 69.50554656982422, 211.50006103515625, 79.11817169189453, 490.7633056640625, 194.84031677246094, 78.97988891601562, 58.97369384765625, 519.5958251953125, 144.32070922851562, 265.23046875, -44.28696060180664, 651.9219360351562, 158.25440979003906, 174.45347595214844, 63.380859375, -7.688224792480469, 290.05877685546875, 401.79754638671875, -39.10234832763672, -37.62267303466797, 669.6489868164062, -165.97821044921875, 310.13409423828125, 99.48098754882812, -166.36947631835938, -59.63651657104492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 192.9438018798828, "std": 252.5821075439453, "min": -352.47540283203125, "p10": -71.62999114990234, "median": 152.6385498046875, "p90": 537.6415588378907, "max": 906.143310546875, "pos_frac": 0.765625, "sample": [98.21187591552734, 97.99556732177734, 59.44304275512695, -51.745880126953125, -54.849365234375, -35.384544372558594, 453.4884033203125, 544.2431640625, 25.06218719482422, -35.18374252319336, -44.82141876220703, 318.00360107421875, 149.30999755859375, 57.28273010253906, 560.983642578125, 683.9254150390625, 55.1550407409668, 169.15036010742188, 348.0535583496094, 253.61355590820312, 735.1506958007812, 91.63402557373047, 22.256933212280273, -300.9945983886719, 57.22590637207031, 94.36666107177734, 12.908416748046875, 155.96710205078125, 135.47607421875, 540.8424072265625, 116.601318359375, 44.716697692871094, 250.26559448242188, -34.21599578857422, 256.8153076171875, 422.84613037109375, 277.94329833984375, 906.143310546875, 530.1729125976562, 363.1925048828125, -58.76045227050781, -77.1455078125, -111.1729736328125, -352.47540283203125, 220.4639129638672, 496.3534851074219, 84.24502563476562, 263.4388427734375, 326.760009765625, 178.7564697265625, 493.3118896484375, 363.3766784667969, 53.178810119628906, -150.33120727539062, 553.6177978515625, 202.1328125, 400.518310546875, 353.1107177734375, -132.5892333984375, -209.2840118408203, 423.74652099609375, 380.1831970214844, -9.65585708618164, 325.3712463378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 192.72705078125, "std": 203.92495727539062, "min": -315.77099609375, "p10": -58.9893497467041, "median": 186.9881362915039, "p90": 455.09032897949226, "max": 609.4053344726562, "pos_frac": 0.8125, "sample": [131.3771209716797, 298.05474853515625, 198.06744384765625, 325.8348388671875, 1.3507232666015625, -57.93628692626953, 401.9570617675781, 519.0050048828125, 554.404541015625, 47.337738037109375, 249.58177185058594, 14.097755432128906, -36.94911575317383, 346.10504150390625, 409.272216796875, 441.48773193359375, 167.52862548828125, 286.74957275390625, 216.930419921875, 265.1264953613281, 59.7911262512207, 534.695068359375, 460.9200134277344, 185.35105895996094, 222.99505615234375, 103.501953125, 404.1295166015625, 334.67047119140625, -18.603179931640625, 258.77349853515625, 197.8780517578125, 200.51205444335938, 609.4053344726562, 168.86801147460938, -94.554931640625, 166.62081909179688, -231.31268310546875, 394.14208984375, 287.753662109375, 114.16522979736328, 175.4678192138672, 94.96074676513672, -134.27162170410156, 543.2764282226562, -59.4406623840332, -315.77099609375, 127.85990142822266, 410.8963317871094, 130.76678466796875, 134.36532592773438, 199.15170288085938, 532.6393432617188, -28.815263748168945, 137.5830841064453, -33.374755859375, -134.53988647460938, 188.62521362304688, 58.47526550292969, 181.64962768554688, 374.1470947265625, 322.3619384765625, -175.111083984375, 142.2672882080078, 322.2767333984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 174.46792602539062, "std": 204.56053161621094, "min": -684.702392578125, "p10": -30.14788780212401, "median": 163.19857025146484, "p90": 411.4960021972656, "max": 648.8417358398438, "pos_frac": 0.859375, "sample": [131.43264770507812, 286.689453125, 144.47755432128906, 282.993408203125, 78.66250610351562, 300.96881103515625, 61.89678955078125, 196.8382568359375, 197.8737030029297, 86.06046295166016, 291.0526428222656, 376.96270751953125, 270.32598876953125, 123.70886993408203, 299.21923828125, -78.9267578125, 178.06784057617188, 553.2098388671875, 87.122802734375, 172.68235778808594, 3.782318115234375, 346.79595947265625, 268.76239013671875, 499.4762878417969, 205.8521728515625, 249.79437255859375, 186.29544067382812, 304.8369140625, -35.5172004699707, 648.8417358398438, 413.10028076171875, 112.025390625, 189.3768310546875, 457.65966796875, 177.6826171875, -684.702392578125, 117.95259094238281, 35.86619567871094, 39.08220672607422, 23.6025390625, 531.02880859375, 26.103988647460938, -12.286544799804688, 276.702880859375, 153.71478271484375, 283.4365539550781, 97.23271942138672, 87.99028015136719, 551.4047241210938, 114.6742172241211, -140.53082275390625, 152.88009643554688, -81.78529357910156, 135.0084228515625, -56.1475830078125, -17.619491577148438, -128.80706787109375, 407.752685546875, 251.38768005371094, 40.51293182373047, 133.83950805664062, 351.69647216796875, 15.87051010131836, 390.0010070800781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 134.10218811035156, "std": 214.3765106201172, "min": -279.25433349609375, "p10": -119.99265060424804, "median": 122.63528442382812, "p90": 400.0217498779297, "max": 1038.1416015625, "pos_frac": 0.71875, "sample": [463.9661560058594, 173.25534057617188, 31.730907440185547, -279.25433349609375, 182.29248046875, 256.223876953125, 92.90397644042969, 4.1450653076171875, 186.33404541015625, 405.78875732421875, 283.0724182128906, 155.52532958984375, 352.2318420410156, 101.56148529052734, -60.69932556152344, 275.33111572265625, 1038.1416015625, 149.55014038085938, 220.086669921875, 146.22735595703125, 257.09375, 199.01101684570312, -7.799594879150391, -133.678955078125, 499.31756591796875, 459.65374755859375, 88.00900268554688, 194.4270782470703, 135.15780639648438, 539.33935546875, 299.90557861328125, 266.0415344238281, 302.6448974609375, -25.9935302734375, -48.03596878051758, 132.44003295898438, -122.16435241699219, -126.58980560302734, 96.10647583007812, -140.27542114257812, -48.719970703125, 52.22698974609375, 182.62864685058594, 145.25259399414062, 209.6255645751953, -25.47637176513672, -97.87850952148438, 400.712158203125, 139.33352661132812, 112.83053588867188, -43.617408752441406, -34.83415222167969, 22.995285034179688, 54.35637283325195, 84.79179382324219, -135.44546508789062, 379.621826171875, 0.1555309295654297, -84.06832122802734, 398.4107971191406, -178.70611572265625, 45.86556625366211, -114.92534637451172, 74.37858581542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 199.99966430664062, "std": 211.86895751953125, "min": -469.13116455078125, "p10": -41.605544281005855, "median": 202.3527069091797, "p90": 491.8602966308594, "max": 603.44091796875, "pos_frac": 0.8125, "sample": [-107.65652465820312, -100.0017318725586, 295.30804443359375, 42.191261291503906, 349.02203369140625, 381.588134765625, -17.1448974609375, 283.77685546875, 429.9046936035156, 32.308082580566406, 81.49164581298828, 134.84693908691406, -58.880977630615234, 143.21937561035156, 70.82479858398438, 107.34422302246094, -469.13116455078125, 54.79328536987305, 440.52203369140625, -44.13250732421875, 507.2577209472656, 417.0542907714844, 129.45362854003906, 301.33074951171875, 36.45746994018555, 293.5348815917969, -31.933717727661133, 497.2469482421875, 397.12451171875, 551.39111328125, 75.0933837890625, 325.6656799316406, -146.9823760986328, 203.46209716796875, 479.29144287109375, 29.044113159179688, 34.637725830078125, -2.441303253173828, 79.66661834716797, -55.416595458984375, 457.25384521484375, 224.92164611816406, 338.8145751953125, 206.758544921875, 328.4897155761719, 201.24331665039062, 88.9070816040039, 250.56222534179688, 504.59100341796875, 268.4992980957031, 532.0693359375, -33.245033264160156, 369.1361083984375, 151.932861328125, 100.01924896240234, 289.3546447753906, 559.1326904296875, 213.5153045654297, 322.067138671875, 603.44091796875, 439.25189208984375, 113.19109344482422, 134.64761352539062, -35.70929718017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 190.62232971191406, "std": 199.73817443847656, "min": -193.22695922851562, "p10": -33.43221549987793, "median": 170.75125885009766, "p90": 462.97325134277355, "max": 730.9229736328125, "pos_frac": 0.828125, "sample": [-127.42434692382812, 139.62428283691406, 325.55230712890625, 41.918701171875, 111.6475601196289, 34.80039978027344, 371.28448486328125, 13.051727294921875, 275.9072570800781, 430.059814453125, 368.806396484375, -34.965702056884766, 477.0790100097656, 52.155052185058594, 323.8615417480469, 92.39607238769531, 8.685813903808594, 231.68963623046875, 355.4029235839844, 556.0098876953125, 28.77398681640625, 577.7857055664062, 218.00534057617188, 300.9319152832031, 152.45370483398438, -29.854080200195312, 202.9391326904297, 115.610107421875, 219.23609924316406, -55.77757263183594, -24.654541015625, 647.5928955078125, 188.91102600097656, -120.83361053466797, 137.40188598632812, 103.19207763671875, -21.849281311035156, 149.5460205078125, 236.12335205078125, 730.9229736328125, 138.5437469482422, 226.40213012695312, 255.41036987304688, 2.409730911254883, -41.02783203125, -193.22695922851562, 354.1276550292969, 333.1347961425781, 161.7447967529297, 328.965087890625, 506.2088623046875, 179.75772094726562, 304.70599365234375, 239.61021423339844, 18.167037963867188, 19.253955841064453, -66.60152435302734, -29.583885192871094, 350.151611328125, 292.57952880859375, 57.211158752441406, 523.0272216796875, 34.046409606933594, 400.8115539550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 171.5524444580078, "std": 196.7308349609375, "min": -481.3936767578125, "p10": -59.480439376831015, "median": 171.34430694580078, "p90": 380.67843933105473, "max": 643.88671875, "pos_frac": 0.875, "sample": [361.748046875, 130.809326171875, 51.21339416503906, 390.87225341796875, 150.15792846679688, 143.22850036621094, 181.13229370117188, 77.26683807373047, 208.8609161376953, 643.88671875, 282.4176025390625, 154.32814025878906, 356.5284423828125, 170.82162475585938, 164.4185028076172, 586.7684326171875, 76.54730224609375, 82.9483642578125, 241.80050659179688, 250.21539306640625, 493.3865661621094, -227.73934936523438, 54.43724822998047, 293.7901306152344, 387.4783630371094, 52.36841583251953, -75.50839233398438, 103.918212890625, 60.1551513671875, 174.76150512695312, 275.002197265625, 137.87034606933594, -103.93680572509766, 44.68191146850586, 267.5449523925781, 410.53643798828125, 42.84516143798828, -481.3936767578125, 253.36392211914062, 96.16976928710938, 174.07241821289062, 6.576080322265625, 622.7918701171875, 313.510986328125, 2.9293594360351562, 313.5158386230469, 268.3149108886719, 171.8669891357422, 272.3138427734375, 344.9006042480469, 172.02159118652344, -261.4367370605469, 274.1827697753906, 152.95339965820312, -149.22225952148438, -22.08188247680664, 364.81195068359375, 116.12722778320312, 132.2386474609375, 187.33763122558594, -117.35213470458984, 329.79071044921875, 172.82666015625, 170.6632080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 189.12228393554688, "std": 223.60415649414062, "min": -387.8031921386719, "p10": -97.27126388549804, "median": 191.85203552246094, "p90": 499.9077575683594, "max": 669.6429443359375, "pos_frac": 0.765625, "sample": [80.1009750366211, -29.79094696044922, 525.945068359375, 166.98269653320312, 188.39669799804688, 212.41314697265625, 98.69292449951172, 268.6915283203125, 349.49163818359375, -387.8031921386719, 328.3941650390625, 306.8155822753906, 136.0828094482422, -116.8228988647461, 368.8316955566406, 195.307373046875, 606.3427734375, -21.3682861328125, 224.9027099609375, 179.99032592773438, 495.5831298828125, 119.64093017578125, -294.8641052246094, 506.5335693359375, 227.2168731689453, -48.9207763671875, -88.79679870605469, 366.311767578125, 277.1640930175781, 558.1444091796875, 434.71142578125, -104.8602294921875, 229.16830444335938, 54.916351318359375, 5.4134368896484375, 80.52033996582031, 316.25732421875, 167.4382781982422, -0.888885498046875, -114.92893981933594, 219.5692596435547, -99.9026107788086, 225.88381958007812, -108.7173080444336, 91.52755737304688, 501.76116943359375, 487.05438232421875, 128.03639221191406, -91.13145446777344, 581.862060546875, -22.83331871032715, 216.29302978515625, 55.99107360839844, 475.10498046875, 99.96844482421875, 339.630859375, 145.75689697265625, 204.31472778320312, 243.5652618408203, -64.653564453125, 669.6429443359375, 459.8462219238281, 301.5601501464844, 176.33840942382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 145.24073791503906, "std": 204.47384643554688, "min": -339.1063537597656, "p10": -83.80540390014649, "median": 115.55158233642578, "p90": 417.290902709961, "max": 716.28515625, "pos_frac": 0.78125, "sample": [122.65724182128906, 20.320465087890625, 0.8057155609130859, 90.77294158935547, 136.9729766845703, -84.90341186523438, 302.64227294921875, 578.3930053710938, 163.21580505371094, 181.0157470703125, 552.7871704101562, 65.71285247802734, 251.90769958496094, 478.00341796875, -74.00004577636719, -43.772796630859375, 75.37841033935547, 510.9989013671875, 117.985595703125, 349.0445556640625, 209.3995361328125, 9.193119049072266, 157.22201538085938, -339.1063537597656, 716.28515625, 81.66409301757812, 71.26139831542969, -29.87030029296875, -105.23460388183594, -11.577583312988281, 157.7518768310547, -19.468727111816406, -141.69467163085938, 108.74586486816406, 70.26171875, 224.64500427246094, 101.11854553222656, 264.66632080078125, 144.6726531982422, 247.1834259033203, 1.5639629364013672, 271.0108642578125, 218.80587768554688, 94.14088439941406, -16.051620483398438, 113.11756896972656, 55.347991943359375, 62.64164733886719, 349.89105224609375, 372.58685302734375, -216.02389526367188, 48.44750213623047, 215.87550354003906, 50.92717742919922, 402.0284118652344, -193.66241455078125, -192.2228546142578, -81.2433853149414, 259.0406799316406, 359.0202331542969, 474.61895751953125, 423.83197021484375, 233.83447265625, 274.82305908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 190.696044921875, "std": 202.9009246826172, "min": -294.92864990234375, "p10": -17.013186264038076, "median": 190.76901245117188, "p90": 465.55877075195315, "max": 698.84521484375, "pos_frac": 0.875, "sample": [190.53244018554688, 210.32904052734375, -20.96773910522461, 206.37010192871094, 336.94464111328125, 429.74755859375, 31.155790328979492, 698.84521484375, 70.36990356445312, 466.05548095703125, 270.4787902832031, 107.48031616210938, -129.18423461914062, 21.06451416015625, 118.90773010253906, 142.11558532714844, 149.8862762451172, 413.3107604980469, 258.169677734375, 315.4178161621094, 186.57603454589844, 272.3360900878906, 261.7198486328125, -32.667236328125, 83.25761413574219, 230.96694946289062, 216.91778564453125, 24.930246353149414, 193.85226440429688, 68.54551696777344, 209.44439697265625, 217.23138427734375, 676.1763305664062, 127.2190933227539, 464.3997802734375, 297.16436767578125, 179.4254608154297, 36.957427978515625, 215.0390625, 89.7258071899414, -294.92864990234375, 560.0472412109375, 104.71148681640625, 363.63677978515625, 98.67445373535156, 191.00558471679688, 68.28238677978516, 61.3516845703125, 551.3873291015625, 74.65123748779297, 221.36741638183594, 573.0967407226562, 251.25167846679688, -282.10211181640625, 25.806560516357422, 500.49688720703125, 272.1929931640625, 93.10032653808594, 80.76211547851562, 305.16558837890625, -7.785896301269531, 375.7802734375, -184.68946838378906, -104.96348571777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 214.2589111328125, "std": 226.18162536621094, "min": -186.07559204101562, "p10": -42.15897827148437, "median": 173.5909881591797, "p90": 540.1522583007812, "max": 721.1572265625, "pos_frac": 0.859375, "sample": [213.66812133789062, -39.836639404296875, 563.3514404296875, 631.3602294921875, -158.99081420898438, 554.2122802734375, 451.176025390625, 340.1719970703125, 99.77597045898438, 539.966064453125, 524.6813354492188, 438.412841796875, 263.4805908203125, 540.7710571289062, 434.46893310546875, 282.69598388671875, 2.7888031005859375, 721.1572265625, -39.732765197753906, 266.7359924316406, 135.87692260742188, 35.689727783203125, 125.07144165039062, 28.976795196533203, -45.109893798828125, 11.327510833740234, 307.1964111328125, 436.1373596191406, 84.84304809570312, 170.05752563476562, 443.9474792480469, 655.5348510742188, 358.0135803222656, -87.57383728027344, 52.06562042236328, 47.89353942871094, 540.2320556640625, 48.2990608215332, -127.19773864746094, 27.3370418548584, 111.97752380371094, 533.8567504882812, -43.154266357421875, 223.78884887695312, 194.80392456054688, 441.84808349609375, 465.0294189453125, 0.2139606475830078, 140.3623046875, -105.85699462890625, 177.12445068359375, 155.86328125, 84.94424438476562, 37.44670104980469, 480.94732666015625, 206.58218383789062, 54.57037353515625, 16.33129119873047, 127.44235229492188, 198.69244384765625, 204.10049438476562, -186.07559204101562, 242.305419921875, 70.49324798583984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 215.1958465576172, "std": 262.50390625, "min": -325.46405029296875, "p10": -79.91460266113278, "median": 178.3249282836914, "p90": 592.898376464844, "max": 987.1412353515625, "pos_frac": 0.828125, "sample": [528.0308227539062, 221.54351806640625, 101.5226058959961, 154.19696044921875, -325.46405029296875, 400.9164123535156, 482.1312255859375, -11.150604248046875, 89.15122985839844, 137.4241943359375, -203.48016357421875, 346.2900695800781, 318.310302734375, 49.441253662109375, 279.81646728515625, 235.47799682617188, 544.4058837890625, -45.589630126953125, 236.78689575195312, 26.580631256103516, 69.15834045410156, 96.88304138183594, -178.95266723632812, 163.6141357421875, 66.43463134765625, -320.1439208984375, 158.43634033203125, 286.853271484375, 987.1412353515625, 687.385009765625, 76.13796997070312, 272.3412780761719, 307.81390380859375, 617.5350341796875, 179.9697265625, -17.41749382019043, 261.3572692871094, 118.24320983886719, 315.25238037109375, 177.16058349609375, 16.89837646484375, 179.48927307128906, 162.4164276123047, 386.06561279296875, 48.573944091796875, 309.11578369140625, -112.96459197998047, -28.704408645629883, 271.18310546875, -111.68753051757812, 706.9480590820312, 527.9478149414062, 236.38650512695312, 612.272216796875, 4.343601226806641, 547.6927490234375, 626.4424438476562, 332.951416015625, 20.215862274169922, 65.10142517089844, 72.25982666015625, 826.1588134765625, 276.5079345703125, -94.62530517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 190.79556274414062, "std": 228.04742431640625, "min": -471.1942443847656, "p10": -57.16238937377929, "median": 188.91583251953125, "p90": 458.30674133300784, "max": 671.8834838867188, "pos_frac": 0.765625, "sample": [181.4344482421875, 175.81642150878906, -8.337860107421875, 33.741920471191406, 22.4554443359375, 311.6151123046875, 619.8699951171875, 321.8470764160156, 549.1240844726562, 296.1162109375, 84.76219177246094, 439.79351806640625, 270.267578125, -97.75679779052734, 671.8834838867188, -14.094024658203125, 455.5672607421875, 248.7278594970703, 6.509786605834961, 224.96702575683594, 413.3037109375, -17.209609985351562, 366.4805908203125, 94.87344360351562, 245.18968200683594, 434.46624755859375, 155.47705078125, 133.6071014404297, 408.1800231933594, 15.198223114013672, 215.02944946289062, 45.410072326660156, 366.420166015625, -191.2756805419922, -25.674835205078125, -188.90658569335938, -34.644920349121094, 37.180809020996094, -471.1942443847656, 578.8505859375, 357.0120849609375, 459.4808044433594, 395.9988708496094, 208.17672729492188, -49.55131530761719, -39.596282958984375, -80.02335357666016, 384.99761962890625, 82.51765441894531, 159.61044311523438, 114.40011596679688, 196.397216796875, 373.2904052734375, 326.13629150390625, -10.8900146484375, 225.26730346679688, 416.5865783691406, 103.52655029296875, 523.3507690429688, -60.424278259277344, -148.60662841796875, 374.682861328125, 523.4935302734375, 0.0108795166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 205.0364990234375, "std": 264.70953369140625, "min": -708.5901489257812, "p10": -57.39187812805176, "median": 202.48907470703125, "p90": 547.6482849121094, "max": 788.3001098632812, "pos_frac": 0.8125, "sample": [270.4349365234375, 11.615787506103516, 14.187896728515625, 190.21524047851562, 300.0477600097656, 183.5982666015625, 153.95269775390625, 135.51763916015625, 185.62493896484375, -708.5901489257812, 659.5076904296875, 34.14275360107422, 551.4968872070312, 121.69900512695312, 325.71331787109375, -58.641143798828125, -342.14178466796875, 449.39910888671875, 65.64493560791016, 311.0005798339844, 30.440895080566406, 788.3001098632812, 409.4446105957031, 230.03591918945312, 538.668212890625, 526.181640625, 270.96221923828125, 455.5462646484375, 214.76290893554688, 149.15884399414062, -105.29611206054688, 262.3006286621094, 13.097787857055664, 487.90399169921875, -6.059394836425781, 131.02835083007812, -73.12039184570312, 183.9993438720703, 170.42066955566406, 147.94114685058594, 41.72850036621094, 496.93475341796875, 265.2395935058594, 224.4393768310547, 226.6873016357422, 226.45208740234375, 566.8294677734375, 397.1672668457031, 784.6815185546875, -49.70458984375, 160.87277221679688, 325.5375671386719, 224.8896942138672, -267.6337890625, 271.917724609375, -183.49368286132812, 716.5880737304688, -54.476924896240234, -40.554073333740234, 574.4874267578125, 231.85707092285156, 227.13046264648438, 94.27143859863281, -19.657167434692383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 161.2817840576172, "std": 251.2393798828125, "min": -298.4122009277344, "p10": -127.67644424438475, "median": 120.29081344604492, "p90": 461.95713195800784, "max": 937.9764404296875, "pos_frac": 0.71875, "sample": [42.1990966796875, -7.83172607421875, 15.751226425170898, 71.44065856933594, 228.072021484375, 219.60755920410156, 489.5407409667969, 118.37184143066406, 451.3851318359375, 49.04253387451172, 459.2140197753906, -37.19142150878906, 206.14785766601562, -34.40230941772461, 885.755126953125, -298.4122009277344, 386.4691467285156, 135.11874389648438, 15.207733154296875, 318.4326477050781, 410.6742858886719, -178.3011474609375, 347.4066162109375, -110.25331115722656, 56.41455078125, 100.6358413696289, -100.4916763305664, 175.15426635742188, 33.53120422363281, 489.7064208984375, 471.929443359375, 317.53192138671875, 313.0695495605469, 434.677490234375, 293.58514404296875, -168.50096130371094, 46.29609680175781, 186.35865783691406, 218.14947509765625, -19.216909408569336, -117.85503387451172, 50.801918029785156, 37.88958740234375, 234.82064819335938, 300.7529296875, 415.1640625, 244.888916015625, 357.0224609375, 59.22138595581055, -270.2352294921875, -20.423919677734375, -244.3170623779297, -148.87388610839844, 463.13275146484375, 937.9764404296875, 122.20978546142578, -93.35665893554688, 176.10494995117188, 47.21755599975586, -64.40434265136719, -131.8856201171875, 401.60723876953125, 533.0205688476562, -0.7201480865478516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 178.77838134765625, "std": 243.92242431640625, "min": -351.5810852050781, "p10": -112.83380203247069, "median": 123.85347747802734, "p90": 562.9403320312503, "max": 851.74609375, "pos_frac": 0.828125, "sample": [50.677345275878906, 84.7930908203125, 328.322509765625, 377.5714416503906, 124.65585327148438, 6.091808319091797, -167.37747192382812, 157.18896484375, 2.9731903076171875, 86.4303970336914, 10.197311401367188, 56.634124755859375, -92.38482666015625, 610.0526123046875, 55.594398498535156, 303.695068359375, 245.0112762451172, 423.2276611328125, 50.93303298950195, 182.4996337890625, 678.1355590820312, -161.34083557128906, 270.86297607421875, 70.72438049316406, 644.6742553710938, 485.6521911621094, 366.0930480957031, 171.30795288085938, 192.431396484375, -121.59764862060547, 215.29263305664062, 317.758544921875, 267.1120910644531, 851.74609375, -152.05718994140625, -153.93746948242188, 641.2985229492188, 161.72314453125, 495.5513610839844, 17.03955841064453, -351.5810852050781, -178.75540161132812, 311.63818359375, 271.935546875, 584.429443359375, 120.65631103515625, 28.79251480102539, 164.7544708251953, 512.799072265625, 35.97190856933594, 363.46942138671875, 375.8974304199219, -54.061805725097656, 123.05110168457031, -43.03200149536133, 58.859100341796875, 62.3800163269043, 58.39716339111328, -4.235424041748047, 129.4912872314453, 597.4855346679688, 53.705841064453125, 42.044342041015625, 22.4660701751709], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 200.58251953125, "std": 265.73431396484375, "min": -319.55206298828125, "p10": -109.48426132202147, "median": 202.68143463134766, "p90": 548.0349975585938, "max": 1004.819091796875, "pos_frac": 0.78125, "sample": [-318.11090087890625, -35.45301818847656, 593.5439453125, 19.265727996826172, -319.55206298828125, 244.2398681640625, -209.51023864746094, 273.4773864746094, 325.0981140136719, -237.14443969726562, 409.72418212890625, -31.484970092773438, 162.06378173828125, 384.57440185546875, 511.8355407714844, 270.2286071777344, 286.52606201171875, 255.56976318359375, -60.590065002441406, 101.69756317138672, 369.6287841796875, -63.948936462402344, 213.8514404296875, 405.3882141113281, 0.9863357543945312, 155.74966430664062, 578.8673095703125, -164.59263610839844, 794.1141357421875, 184.791015625, 405.7091369628906, 72.93760681152344, 34.27706527709961, -120.0452880859375, 451.17779541015625, 584.6510009765625, 63.57573699951172, -84.84186553955078, -222.98976135253906, 53.84065246582031, 275.3349914550781, 345.0572204589844, 331.33642578125, 93.36042785644531, -41.56670379638672, 72.44377136230469, 300.78717041015625, 57.322792053222656, 237.8370361328125, 547.347900390625, 363.4290771484375, 335.310302734375, 38.705535888671875, 1004.819091796875, 548.3294677734375, 431.5809631347656, 107.72476196289062, 218.25502014160156, 203.94720458984375, 201.41566467285156, 63.897743225097656, 102.46525573730469, 700.042724609375, -41.028770446777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 137.62460327148438, "std": 270.09466552734375, "min": -665.2985229492188, "p10": -234.9845962524414, "median": 136.26026916503906, "p90": 540.6116180419923, "max": 811.2008056640625, "pos_frac": 0.75, "sample": [172.8421173095703, 52.60626983642578, 181.30462646484375, 811.2008056640625, 10.334716796875, 501.6960144042969, -238.41171264648438, -139.3893585205078, -59.30982208251953, -665.2985229492188, 129.22799682617188, 203.072998046875, -226.9879913330078, 184.44430541992188, 157.7063446044922, -265.1153869628906, 195.2671356201172, 318.92633056640625, 584.2816162109375, 341.2957763671875, 270.49981689453125, 264.6112365722656, 11.294609069824219, -83.55477905273438, 104.91476440429688, 384.0843811035156, 41.64825439453125, -255.6343231201172, 19.41119384765625, -268.1732482910156, 597.6681518554688, 24.090713500976562, 271.6766357421875, 85.9842758178711, 189.83999633789062, 621.1757202148438, 208.68165588378906, 110.27182006835938, -350.5343322753906, 128.313720703125, -392.8060607910156, 323.0086975097656, -82.78040313720703, -39.287506103515625, 579.4143676757812, 108.72216033935547, 133.19882202148438, 202.4449005126953, 139.32171630859375, 132.43426513671875, 128.1298828125, -1.2326278686523438, -11.318763732910156, 746.7617797851562, 176.25465393066406, 233.17544555664062, 128.6383056640625, 164.61038208007812, 557.2897338867188, -98.88026428222656, 237.63267517089844, 156.44143676757812, 261.0155029296875, 399.82147216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 163.90359497070312, "std": 212.20120239257812, "min": -265.8343811035156, "p10": -54.11387252807617, "median": 109.83685302734375, "p90": 466.48104248046883, "max": 659.5640869140625, "pos_frac": 0.796875, "sample": [165.50045776367188, 264.24053955078125, 366.0572509765625, 562.8290405273438, -0.2916450500488281, 12.875207901000977, 411.48980712890625, 187.71600341796875, -177.99464416503906, -103.49592590332031, 71.891845703125, 112.97732543945312, 252.06719970703125, 521.9393920898438, 440.78973388671875, 21.396459579467773, 42.0859375, 18.443084716796875, -54.81488037109375, -16.794002532958984, 237.81410217285156, -223.1942596435547, 74.13365173339844, 35.274658203125, 477.32421875, 106.69638061523438, 318.99810791015625, 97.19784545898438, 158.85357666015625, 269.2926025390625, 187.42327880859375, 85.43307495117188, 441.1802978515625, 231.97645568847656, 380.20867919921875, 19.39663314819336, 122.44532775878906, 62.96814727783203, 506.53704833984375, 345.1660461425781, 17.366409301757812, 168.5088653564453, -265.8343811035156, 319.51519775390625, 92.27007293701172, -39.26886749267578, 104.88475036621094, 67.1337890625, 612.5161743164062, -56.72611999511719, 191.47430419921875, 538.718994140625, 261.7420654296875, -31.48175048828125, -257.16485595703125, 350.8769836425781, -35.93883514404297, 60.68257141113281, 211.13821411132812, 659.5640869140625, 86.80213928222656, 416.05169677734375, -52.478187561035156, 35.44288635253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 215.52365112304688, "std": 320.1552734375, "min": -689.9510498046875, "p10": -109.48613128662109, "median": 172.72447204589844, "p90": 630.714764404297, "max": 1467.546875, "pos_frac": 0.765625, "sample": [124.09085845947266, -95.624267578125, 33.56693649291992, -689.9510498046875, 49.011749267578125, -121.87457275390625, -17.162275314331055, 634.81103515625, 205.319580078125, -42.43603515625, 156.72335815429688, 407.89422607421875, 20.191146850585938, -141.4355926513672, -151.23995971679688, 70.71025848388672, 172.62765502929688, 119.64213562011719, 174.58929443359375, 270.2411193847656, 876.480712890625, 0.4935150146484375, 394.54608154296875, -134.65383911132812, 613.2422485351562, 236.04864501953125, -112.64979553222656, 557.7542114257812, 673.7581787109375, 717.4120483398438, -127.24446868896484, -102.104248046875, -12.442955017089844, 6.2258453369140625, -15.475257873535156, 663.7061767578125, 720.8818359375, 165.83555603027344, 378.4501953125, 274.5025634765625, 17.398344039916992, 3.8284454345703125, 1467.546875, 302.9723815917969, 621.1567993164062, 222.89077758789062, 440.578857421875, 469.36138916015625, 12.742008209228516, 88.13487243652344, 220.89877319335938, 402.2676696777344, 367.22015380859375, 105.23609161376953, 572.1102294921875, 172.8212890625, -21.661836624145508, 416.7606506347656, -99.99943542480469, 192.53555297851562, 188.53573608398438, 223.7494354248047, 61.8526611328125, 390.1134033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 183.76528930664062, "std": 257.1705627441406, "min": -371.01318359375, "p10": -89.57877807617184, "median": 159.99629974365234, "p90": 552.7271972656251, "max": 862.74609375, "pos_frac": 0.75, "sample": [-49.75751495361328, -204.1719970703125, -371.01318359375, 338.62451171875, 40.831329345703125, -40.9747200012207, 357.97930908203125, 190.66778564453125, 90.28282928466797, 131.04824829101562, 599.23779296875, 563.3519287109375, 862.74609375, -63.441795349121094, 210.1144256591797, 769.3526611328125, -287.87200927734375, 366.45123291015625, 142.2845458984375, 200.66004943847656, -131.45181274414062, 321.0539855957031, 85.67391204833984, 125.56135559082031, 393.0821838378906, 151.8819580078125, 841.8600463867188, -0.3573722839355469, 228.39523315429688, -14.12662124633789, -21.675180435180664, 129.98109436035156, -126.85980224609375, 80.96260070800781, 198.2897186279297, 527.9361572265625, 168.1106414794922, 174.7598419189453, 3.7567138671875, 188.42575073242188, 36.365074157714844, 337.221923828125, 309.7481689453125, -28.581974029541016, 194.43951416015625, -50.609779357910156, -224.8312530517578, 440.8172912597656, 36.69097900390625, 714.0645751953125, -100.78034210205078, 43.382301330566406, 247.97634887695312, 262.77044677734375, 268.56396484375, 283.36956787109375, 85.70613098144531, 652.0790405273438, 269.8577575683594, 126.71061706542969, 49.04388427734375, 339.5020751953125, -2.4241256713867188, 298.234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 215.85569763183594, "std": 285.91180419921875, "min": -434.43377685546875, "p10": -192.05101013183594, "median": 234.44065856933594, "p90": 574.6693115234375, "max": 807.737060546875, "pos_frac": 0.765625, "sample": [240.4398193359375, 265.243408203125, 161.63409423828125, 334.66949462890625, 575.1338500976562, -380.02923583984375, -94.34337615966797, 674.8948974609375, 409.8529052734375, -5.556949615478516, -22.474966049194336, 294.5947265625, 172.91065979003906, 508.1441345214844, 331.3017578125, 423.7881774902344, 145.9895477294922, 456.92041015625, 46.529510498046875, 72.2084732055664, -66.4505615234375, 193.92398071289062, 172.54403686523438, -6.592689514160156, 470.4435119628906, 336.65850830078125, -193.22238159179688, 151.62831115722656, 142.22511291503906, 283.68218994140625, 88.74263000488281, 671.5645751953125, 314.51715087890625, -434.43377685546875, 519.628662109375, -354.1147155761719, 573.5853881835938, 365.8740234375, 463.8128662109375, 205.0844268798828, 69.53937530517578, 433.4434814453125, 592.226806640625, 286.2928466796875, 49.31143569946289, 347.2466125488281, -91.82160949707031, -233.76043701171875, 753.36474609375, 443.8180236816406, 468.64373779296875, -260.62542724609375, 285.3995666503906, -246.15139770507812, -12.219966888427734, 242.19049072265625, 500.7393493652344, 93.29251098632812, 663.8931884765625, 807.737060546875, -189.31781005859375, 21.974720001220703, 228.44149780273438, 50.15348815917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 216.28659057617188, "std": 262.0798034667969, "min": -200.11720275878906, "p10": -73.08536300659178, "median": 132.62308502197266, "p90": 601.0623413085938, "max": 906.44287109375, "pos_frac": 0.8125, "sample": [143.81675720214844, 337.6671142578125, -78.72913360595703, 351.4168395996094, 385.3091735839844, 369.44476318359375, 836.3848876953125, 104.25129699707031, 486.07421875, -19.05341911315918, 906.44287109375, 308.50189208984375, 79.71735382080078, 242.7512969970703, -92.13981628417969, 112.280029296875, 112.4904556274414, 596.5167846679688, 55.71110534667969, 333.9072265625, 738.2130126953125, 65.02705383300781, -121.90076446533203, 56.02171325683594, 466.4350891113281, 58.42474365234375, 65.2451400756836, 84.74525451660156, 233.37924194335938, 395.8895568847656, 346.10150146484375, 194.09681701660156, -20.82089614868164, 237.4434814453125, 48.39527130126953, 57.967864990234375, 252.6515350341797, -133.9049835205078, 13.870359420776367, -0.6611328125, 103.406982421875, 25.4599666595459, 224.38629150390625, -82.45879364013672, 41.31980895996094, 889.8246459960938, 314.7471618652344, 227.02249145507812, 671.7506103515625, 64.4951171875, 522.6721801757812, 137.44046020507812, 330.93646240234375, 603.0104370117188, 480.3963623046875, 68.56680297851562, -59.91656494140625, -200.11720275878906, 640.445556640625, -185.94583129882812, 127.80570983886719, -58.6724853515625, 3.5296363830566406, 342.8541259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 185.5377655029297, "std": 248.04286193847656, "min": -260.83447265625, "p10": -95.70340576171874, "median": 152.6608428955078, "p90": 462.5611938476563, "max": 869.888916015625, "pos_frac": 0.765625, "sample": [-14.318801879882812, 433.49090576171875, 810.1138305664062, -45.712276458740234, 220.7969970703125, -14.144987106323242, 247.6161346435547, -51.380828857421875, 250.72264099121094, 21.13555908203125, 168.98416137695312, 47.32740783691406, 56.16014862060547, 9.461830139160156, 247.612060546875, 136.3375244140625, 416.38385009765625, -10.731586456298828, 331.89471435546875, 414.4270324707031, 218.9877471923828, 229.43841552734375, 133.95315551757812, 277.22967529296875, -84.13815307617188, -117.10822296142578, 459.9345703125, 623.51416015625, 83.26470184326172, -260.83447265625, 221.01687622070312, 5.852256774902344, 117.02658081054688, 23.188339233398438, -100.66566467285156, 551.719482421875, 869.888916015625, 463.6868896484375, 191.91258239746094, -100.65994262695312, -30.18798828125, 449.42999267578125, -210.11163330078125, 243.99072265625, 428.7430419921875, 6.324789047241211, 110.38182067871094, 736.5026245117188, 252.51405334472656, 305.2320556640625, 23.546091079711914, 337.47406005859375, -188.419677734375, 333.22259521484375, 88.86897277832031, 289.49090576171875, -78.27162170410156, 429.4881286621094, 60.81541442871094, 660.4584350585938, -173.37420654296875, 40.86516189575195, 194.02786254882812, 80.02062225341797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 170.16653442382812, "std": 256.2247009277344, "min": -488.0574035644531, "p10": -110.62314682006836, "median": 153.4263916015625, "p90": 564.0956115722657, "max": 716.5557250976562, "pos_frac": 0.703125, "sample": [366.955078125, -24.406391143798828, 97.64088439941406, 577.6607666015625, -112.69364166259766, 350.5749816894531, 537.386962890625, 716.5557250976562, -8.329629898071289, -168.92428588867188, 476.8831787109375, 179.00540161132812, 372.4398193359375, -105.7919921875, 27.90509796142578, 193.62399291992188, 90.00813293457031, 15.114336013793945, -46.184043884277344, 392.82049560546875, -488.0574035644531, -11.115612030029297, -55.313438415527344, 370.0174865722656, 436.6003112792969, 604.4357299804688, -94.22760772705078, 295.24078369140625, -240.834228515625, 120.75547790527344, -41.16473388671875, 292.3834228515625, 660.7734985351562, 226.8850555419922, 494.73590087890625, 274.5, -185.545166015625, 161.27191162109375, 116.3908462524414, 583.476318359375, 617.2779541015625, -78.81494140625, 217.24313354492188, 474.4972229003906, 187.84524536132812, 289.45440673828125, 278.18145751953125, 324.9716491699219, 180.49429321289062, -37.60852813720703, 40.927093505859375, -41.27423858642578, 192.4984588623047, 80.98960876464844, 137.44110107421875, 575.5421752929688, -226.10922241210938, -54.38212585449219, 21.917423248291016, 23.324954986572266, 267.141357421875, 32.33941650390625, -208.27328491210938, 145.58087158203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 191.04885864257812, "std": 180.13873291015625, "min": -308.03912353515625, "p10": -23.557669067382804, "median": 193.12435150146484, "p90": 413.8148742675782, "max": 614.145263671875, "pos_frac": 0.859375, "sample": [15.212936401367188, 295.10247802734375, 305.6295471191406, 65.94280242919922, -12.74676513671875, -32.89280700683594, -27.202484130859375, 61.36283874511719, 169.9163818359375, 177.30731201171875, 365.3451232910156, 393.17816162109375, 216.740478515625, 359.7375183105469, 76.97218322753906, 328.5469970703125, 317.2565002441406, 310.7355651855469, -67.95459747314453, 60.61728286743164, 146.28207397460938, 360.53985595703125, 50.51860046386719, 103.13421630859375, 314.0394287109375, 76.50613403320312, 315.547607421875, 38.55393981933594, 249.8118896484375, 104.7491455078125, 134.29098510742188, 67.46279907226562, 614.145263671875, 126.8376235961914, -85.72821807861328, -308.03912353515625, 308.3560791015625, -15.0531005859375, 21.635360717773438, 313.8036193847656, -84.93502807617188, 517.79248046875, 37.06773376464844, 41.5091552734375, 481.14141845703125, 194.29298400878906, 17.581787109375, 78.83261108398438, 257.5597839355469, 372.50372314453125, 263.8143005371094, -34.56270217895508, 209.4564666748047, 491.9832458496094, 443.11834716796875, 68.48594665527344, 249.06881713867188, 191.95571899414062, 422.6591796875, 506.8365478515625, 349.1488342285156, 230.52972412109375, 381.4009704589844, 223.68359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 176.1479949951172, "std": 256.9924011230469, "min": -356.2535095214844, "p10": -73.85463714599608, "median": 128.01253128051758, "p90": 515.7008300781251, "max": 1158.3367919921875, "pos_frac": 0.765625, "sample": [419.6238708496094, 302.9482116699219, 124.65406799316406, 214.34103393554688, 157.31521606445312, 684.715087890625, 272.8245849609375, 25.536314010620117, 243.55714416503906, 537.5285034179688, 156.07456970214844, 89.12588500976562, 40.218040466308594, -66.39764404296875, -1.51800537109375, -10.280067443847656, 363.8408508300781, 531.3543701171875, 21.36255645751953, 104.74471282958984, 1158.3367919921875, 356.1664733886719, 18.410884857177734, 280.64251708984375, 256.991455078125, 62.6202392578125, -16.062829971313477, 249.5028533935547, 255.7522735595703, -116.00216674804688, -61.96900177001953, 277.9007568359375, -77.05049133300781, 351.7760009765625, 22.36285400390625, 161.77146911621094, 697.1422119140625, 348.6702880859375, -8.952543258666992, 199.62130737304688, 125.16358184814453, 13.017890930175781, 79.56477355957031, 50.563011169433594, 660.8841552734375, 571.0631103515625, 252.10765075683594, -43.357940673828125, -214.46942138671875, 130.86148071289062, -356.2535095214844, 431.5145568847656, 479.1759033203125, 54.896018981933594, -130.02749633789062, 41.64837646484375, -82.92227935791016, 59.45417785644531, 379.1572570800781, -331.9318542480469, 173.38507080078125, 287.15924072265625, -6.4618072509765625, 20.079185485839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 169.38018798828125, "std": 221.75775146484375, "min": -432.87982177734375, "p10": -39.89112434387204, "median": 119.96797561645508, "p90": 441.5025207519531, "max": 1041.1441650390625, "pos_frac": 0.78125, "sample": [-7.875164031982422, 15.727039337158203, 50.68020248413086, 51.952545166015625, 81.16424560546875, -100.51026916503906, 374.6949157714844, 27.97197723388672, 112.43975830078125, -145.98452758789062, 218.88304138183594, 407.75048828125, 157.00991821289062, 476.6007080078125, 47.6937255859375, 94.5549545288086, 41.829532623291016, 521.4439086914062, 58.596824645996094, 64.65583801269531, 176.4466552734375, -85.12862396240234, -8.560272216796875, 456.74542236328125, 282.95465087890625, 322.414794921875, -50.692020416259766, -8.468311309814453, 402.7693176269531, -0.9074554443359375, 126.52288818359375, 202.14825439453125, 250.06866455078125, 439.75811767578125, 27.81134033203125, 134.5836944580078, 160.3218994140625, -117.77387237548828, 52.35434341430664, 318.92840576171875, -432.87982177734375, 328.1214599609375, 83.97933197021484, -12.181838989257812, 75.75544738769531, 280.2947082519531, -14.689033508300781, 113.4130630493164, 602.31689453125, 486.57159423828125, 147.52224731445312, 293.8275146484375, 234.51133728027344, -66.72061920166016, 442.2501220703125, -14.015308380126953, 32.51677322387695, 80.43025970458984, 289.0524597167969, 148.47579956054688, 1041.1441650390625, 262.64990234375, 370.12005615234375, 436.2881164550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 178.78890991210938, "std": 232.13523864746094, "min": -430.672607421875, "p10": -117.5973434448242, "median": 140.76502990722656, "p90": 500.147412109375, "max": 849.3489379882812, "pos_frac": 0.828125, "sample": [406.1843566894531, 174.79180908203125, 120.9924545288086, 356.4907531738281, 227.26840209960938, 151.05723571777344, -190.29345703125, 93.65727996826172, -86.24046325683594, 580.95068359375, 124.501220703125, 33.66718292236328, 76.18693542480469, -157.1048583984375, 479.74334716796875, -430.672607421875, 508.4084167480469, -134.7392120361328, 272.4922790527344, -61.794456481933594, 209.73468017578125, 319.8366394042969, 89.12715911865234, 154.12265014648438, 496.59503173828125, 277.5965270996094, 65.06254577636719, 138.7146759033203, 123.06661987304688, 520.0498046875, 145.02639770507812, 567.9249267578125, 51.19971466064453, 123.60218811035156, -126.17347717285156, 104.4617919921875, -158.917236328125, 65.49119567871094, 501.66986083984375, 393.69659423828125, -25.536888122558594, 407.8780517578125, 77.53438568115234, 345.3697814941406, 142.8153839111328, 12.821243286132812, 427.52874755859375, 293.85614013671875, 4.026178359985352, 53.07637023925781, 252.19419860839844, 24.131481170654297, 355.83868408203125, 603.253173828125, 259.50439453125, 849.3489379882812, 96.29705047607422, -97.58636474609375, 254.90516662597656, 159.23944091796875, 15.765731811523438, 400.5127258300781, 100.95057678222656, -178.66993713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 196.12857055664062, "std": 204.69528198242188, "min": -226.26870727539062, "p10": -89.68300857543943, "median": 170.0171661376953, "p90": 488.4424987792969, "max": 566.1908569335938, "pos_frac": 0.828125, "sample": [170.8134002685547, 464.1666259765625, -226.26870727539062, 242.57455444335938, 41.800132751464844, 516.2890014648438, -163.79751586914062, -139.45156860351562, 42.0382080078125, 118.31536102294922, 438.37652587890625, -1.6870994567871094, 257.124267578125, 100.74838256835938, 414.3780517578125, 533.671142578125, -101.05502319335938, 131.39772033691406, 218.27001953125, 30.326820373535156, 264.470947265625, 477.7117004394531, 227.48512268066406, 566.1908569335938, -118.15771484375, 304.46429443359375, 105.55426025390625, 169.22093200683594, 99.02961730957031, 4.50517463684082, 269.02838134765625, 129.69192504882812, 160.5146484375, 241.51902770996094, 285.53070068359375, 493.0414123535156, 539.1790161132812, 149.06704711914062, -117.35032653808594, 128.53028869628906, 112.83676147460938, 434.65594482421875, 385.3679504394531, 200.71728515625, 158.5428009033203, 560.822509765625, 220.4247589111328, 45.99232482910156, 15.968406677246094, 447.9398193359375, 499.86773681640625, 298.1592712402344, 153.84454345703125, -23.639989852905273, -32.069732666015625, 76.71832275390625, 343.0084533691406, 398.48345947265625, 329.68072509765625, -63.14830780029297, 433.8885192871094, 208.84188842773438, -129.97528076171875, 8.041740417480469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 195.45639038085938, "std": 239.0780487060547, "min": -656.6737060546875, "p10": -89.01482391357422, "median": 184.35983276367188, "p90": 493.1237731933595, "max": 802.80908203125, "pos_frac": 0.828125, "sample": [184.35726928710938, 88.96797180175781, 128.18423461914062, -90.58013916015625, 315.13134765625, 149.85916137695312, 145.37205505371094, -24.599897384643555, 521.5890502929688, 60.50189208984375, 199.35308837890625, 443.16552734375, 111.7420425415039, 568.0010986328125, 87.17774200439453, 61.74791717529297, -86.41668701171875, 353.3525085449219, 567.1329345703125, 384.1295471191406, 228.8510284423828, -656.6737060546875, 248.7056884765625, -90.12831115722656, 71.19882202148438, 184.36239624023438, 249.47642517089844, 375.632568359375, 74.07640075683594, 36.749267578125, -163.23822021484375, 653.4251708984375, 81.001953125, 7.6201019287109375, 157.87100219726562, 462.10882568359375, 802.80908203125, 395.33709716796875, 133.33004760742188, 9.627822875976562, -62.695491790771484, 77.70442962646484, 60.47612380981445, 444.8287353515625, 348.22955322265625, 325.5323791503906, 506.4158935546875, 425.68499755859375, 334.2276611328125, -32.547122955322266, 395.97125244140625, -159.8575897216797, 255.25531005859375, 296.05615234375, -136.8798828125, 67.90098571777344, 555.70263671875, 435.67877197265625, -95.87223815917969, 245.7305908203125, 208.0150909423828, 41.28403091430664, 322.4134521484375, 219.64149475097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 179.8553924560547, "std": 204.19400024414062, "min": -251.6104736328125, "p10": -64.74428176879883, "median": 177.94214630126953, "p90": 427.8214324951172, "max": 617.9930419921875, "pos_frac": 0.78125, "sample": [-65.52862548828125, 224.94757080078125, 428.4403381347656, -62.914146423339844, 100.92936706542969, 363.0799560546875, 9.379199981689453, -15.200811386108398, -9.954816818237305, 230.14132690429688, -227.25198364257812, 465.5289306640625, 405.3959655761719, -81.61697387695312, 57.404212951660156, 359.30718994140625, 209.43186950683594, 295.627685546875, 102.86135864257812, 138.20819091796875, -47.43513870239258, 106.61512756347656, 336.096435546875, 80.8271484375, 170.1927490234375, 584.31591796875, 381.04058837890625, 14.51449966430664, 28.125276565551758, 260.1794738769531, -251.6104736328125, 575.7448120117188, -34.50099563598633, 129.6612548828125, 579.81103515625, 230.29183959960938, 301.9678649902344, -66.31797790527344, -131.33181762695312, -1.0781021118164062, 94.33755493164062, 182.1075439453125, 316.9942626953125, 277.74261474609375, 91.38367462158203, 317.8245849609375, 426.3773193359375, -124.12293243408203, 315.9130859375, 196.98666381835938, 281.6722412109375, -52.73065948486328, 356.9194641113281, 181.8415985107422, 19.610610961914062, 69.3525161743164, 29.333160400390625, 539.44921875, 617.9930419921875, 288.2018127441406, 63.426456451416016, 395.66387939453125, 275.09906005859375, 174.04269409179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 197.71823120117188, "std": 265.8747253417969, "min": -278.28021240234375, "p10": -144.2165298461914, "median": 182.77779388427734, "p90": 561.3850402832032, "max": 919.43505859375, "pos_frac": 0.734375, "sample": [477.92938232421875, 455.5765075683594, 460.0029296875, 33.03858184814453, -209.73028564453125, 593.743408203125, -181.41119384765625, 314.7135009765625, 188.7311248779297, 573.879150390625, 308.6243896484375, 74.94924926757812, 372.57135009765625, 793.7394409179688, 319.8616943359375, 177.93536376953125, 436.9403076171875, 233.39663696289062, -45.079917907714844, 568.082763671875, 187.62022399902344, 254.41485595703125, 527.6446533203125, 248.33274841308594, 26.771873474121094, 166.7009735107422, -58.19389343261719, -192.60671997070312, 204.91754150390625, 100.56438446044922, -137.73568725585938, 260.3238830566406, -199.56378173828125, 136.26014709472656, 253.19232177734375, -153.36688232421875, -18.54381561279297, 919.43505859375, -29.46441650390625, 264.5559387207031, 14.541328430175781, 88.89124298095703, 248.00955200195312, 14.43818473815918, 627.3406982421875, 322.5199279785156, 37.4405403137207, -32.830810546875, -18.014339447021484, 126.99288940429688, -49.54324722290039, 543.4468994140625, 142.82583618164062, -17.263282775878906, -278.28021240234375, 493.2760009765625, 137.68296813964844, 227.42462158203125, 29.416770935058594, 254.86009216308594, 545.7570190429688, -146.99403381347656, -81.89550018310547, 715.1703491210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 182.63937377929688, "std": 258.8452453613281, "min": -362.2597961425781, "p10": -57.057377624511716, "median": 147.42684936523438, "p90": 495.3883666992188, "max": 1028.53955078125, "pos_frac": 0.796875, "sample": [126.34503173828125, 1022.1580200195312, 504.18988037109375, 164.67428588867188, 138.2472686767578, 65.41223907470703, -123.12738800048828, 93.964599609375, 170.21994018554688, 366.35693359375, -291.3429260253906, 431.2077941894531, 1028.53955078125, 340.27264404296875, 225.16812133789062, 304.7803955078125, -227.54544067382812, 213.41229248046875, 384.4429931640625, 221.242919921875, 555.1809692382812, 101.92573547363281, 91.33616638183594, 288.8506774902344, 73.4993896484375, 52.29914855957031, 70.67974090576172, 688.868408203125, 349.8370056152344, 474.85150146484375, 184.09500122070312, 381.0480041503906, 179.55133056640625, 137.7894744873047, -49.429107666015625, 96.13252258300781, 158.67835998535156, 581.02197265625, -5.5403289794921875, 48.75107955932617, 303.76409912109375, 194.5931854248047, -56.46693420410156, 117.90411376953125, 150.61390686035156, -19.920974731445312, -20.484710693359375, 601.3027954101562, -48.96827697753906, 3.5935420989990234, 204.62547302246094, 98.80586242675781, -215.55706787109375, -57.3104248046875, -130.27835083007812, 176.6632843017578, 144.2397918701172, 5.1965179443359375, -362.2597961425781, 310.9525451660156, 229.67898559570312, 16.413223266601562, 39.9276008605957, 383.8455810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 192.15664672851562, "std": 256.6850891113281, "min": -517.7696533203125, "p10": -82.62397956848142, "median": 185.08049774169922, "p90": 523.9366333007813, "max": 916.036376953125, "pos_frac": 0.796875, "sample": [93.047607421875, 320.3916931152344, 103.16285705566406, 300.79583740234375, -203.07186889648438, -25.900650024414062, 433.2708435058594, 164.63870239257812, 413.93243408203125, -56.28397750854492, 28.857467651367188, -93.91255187988281, 508.79998779296875, 242.5059356689453, 26.389177322387695, 251.7251739501953, 530.4237670898438, 188.6011505126953, 79.1841049194336, 240.82901000976562, 54.39734649658203, 352.0961608886719, -395.810546875, -115.06523132324219, 209.36868286132812, 563.0946655273438, 326.081787109375, 112.99075317382812, 181.55984497070312, 916.036376953125, 327.1767578125, 228.746337890625, -24.948698043823242, 242.46836853027344, -246.71578979492188, 167.88706970214844, -38.026573181152344, -517.7696533203125, 447.028564453125, 265.14959716796875, -13.177024841308594, 283.4919738769531, 80.33889770507812, 292.36175537109375, 304.9170227050781, 120.53514862060547, -19.494712829589844, 314.0843200683594, 298.82562255859375, -162.77545166015625, 499.1634216308594, 70.93118286132812, 174.29083251953125, 13.4659423828125, 103.11395263671875, 548.1883544921875, 599.2503662109375, 23.290054321289062, 18.74262237548828, 99.92657470703125, 284.10650634765625, 391.3464050292969, 554.0689697265625, 815.8999633789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 182.71548461914062, "std": 294.4086608886719, "min": -396.4783935546875, "p10": -151.29063720703124, "median": 138.54206466674805, "p90": 513.5174865722656, "max": 1002.9725341796875, "pos_frac": 0.6875, "sample": [744.7066650390625, -391.53997802734375, 107.59709930419922, -4.584175109863281, -2.361541748046875, 239.2894287109375, 87.39852905273438, 318.49188232421875, 84.33338928222656, -120.78099060058594, 353.9774169921875, 21.145584106445312, 307.720703125, 80.24778747558594, 193.36370849609375, -396.4783935546875, -137.9517059326172, 8.452224731445312, 714.4869384765625, 837.2490844726562, 51.125946044921875, -40.10662078857422, -103.55516052246094, 30.237133026123047, 228.050048828125, -194.36900329589844, 36.72126770019531, -141.86990356445312, -73.07408905029297, 447.7077941894531, 451.295654296875, 3.3214111328125, 295.6075439453125, 513.6851196289062, 349.3567199707031, 220.3643035888672, -155.32809448242188, 169.48703002929688, -18.92767333984375, 728.5491333007812, 478.5218811035156, 434.0352478027344, 1002.9725341796875, 546.7381591796875, -49.40175247192383, 359.58013916015625, -169.40972900390625, 493.2587890625, -52.42962646484375, 304.11810302734375, 359.22418212890625, 21.034591674804688, 354.56378173828125, 411.0602722167969, 470.8062438964844, 513.1263427734375, 46.66682434082031, 247.22100830078125, 228.5698699951172, -25.399208068847656, -207.9383087158203, -40.06563186645508, 364.3182373046875, -240.42234802246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 168.69830322265625, "std": 228.4167022705078, "min": -196.82305908203125, "p10": -97.92170333862305, "median": 141.85376739501953, "p90": 533.0757995605469, "max": 739.2601318359375, "pos_frac": 0.734375, "sample": [548.47119140625, -48.35796356201172, 341.3912353515625, -34.631317138671875, -99.93360900878906, 70.79972839355469, 58.631446838378906, 185.09523010253906, 307.15301513671875, -29.181961059570312, 292.9927978515625, 62.335479736328125, -49.7696418762207, 327.0457763671875, 83.05001068115234, 528.3582763671875, 375.47802734375, 211.7323760986328, -143.69662475585938, 375.0170593261719, 275.7005615234375, 535.0975952148438, 600.4044799804688, 51.28460693359375, -196.82305908203125, 210.081298828125, 194.89984130859375, 210.22354125976562, 448.7906799316406, 163.86973571777344, 518.5601806640625, 136.5797882080078, 77.24481201171875, 152.14642333984375, 157.2255096435547, 315.767822265625, -70.32791137695312, -49.982513427734375, 242.87513732910156, 14.203872680664062, 52.9968147277832, -108.82000732421875, 739.2601318359375, 38.63291931152344, 258.1884460449219, 15.765083312988281, -171.37564086914062, -8.464353561401367, -76.63359069824219, 550.974365234375, 101.98004150390625, 226.55345153808594, 645.7179565429688, 147.12774658203125, 123.19303894042969, -177.00299072265625, 277.0826721191406, -11.172115325927734, 429.1467590332031, 562.5120849609375, -159.8427276611328, -93.22725677490234, 76.32807922363281, 7.967918395996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 240.21714782714844, "std": 275.72900390625, "min": -278.36279296875, "p10": -7.881428909301752, "median": 173.38636016845703, "p90": 649.0011840820315, "max": 982.080810546875, "pos_frac": 0.875, "sample": [346.66009521484375, 127.97007751464844, 502.40557861328125, 88.18046569824219, 53.46526336669922, 982.080810546875, 173.38589477539062, 762.8154296875, 491.09967041015625, 714.1393432617188, 383.45751953125, 343.88104248046875, 52.191253662109375, 19.051212310791016, 32.50141906738281, 213.5552520751953, 73.84156799316406, 135.8201446533203, -1.8191566467285156, 492.84515380859375, 151.86965942382812, 20.80801773071289, 672.5445556640625, 455.81512451171875, 289.8751525878906, 75.91162872314453, 76.43466186523438, 694.9374389648438, 93.56648254394531, 531.1270141601562, 14.409414291381836, 175.75775146484375, 57.820159912109375, 173.38682556152344, 191.20498657226562, 329.86962890625, 454.014892578125, 68.40803527832031, 346.3116455078125, 19.508338928222656, 594.066650390625, 295.8664855957031, 37.14215087890625, 381.8193664550781, 208.3292694091797, 138.76605224609375, -278.36279296875, 41.52131271362305, 883.4398803710938, 138.86070251464844, 974.24560546875, -230.44171142578125, 226.0089111328125, 184.28565979003906, 480.22576904296875, -10.479545593261719, -70.40645599365234, -107.31861877441406, -30.52997589111328, 107.60555267333984, 205.8428955078125, -221.08316040039062, 129.219970703125, 414.1641540527344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 263.0432434082031, "std": 255.05038452148438, "min": -201.60223388671875, "p10": -57.59666213989258, "median": 244.99117279052734, "p90": 609.46171875, "max": 795.4320068359375, "pos_frac": 0.828125, "sample": [389.61651611328125, 10.349800109863281, 129.30169677734375, 510.7543029785156, 459.62359619140625, 124.55282592773438, 455.74395751953125, 393.47735595703125, -179.32041931152344, 599.305908203125, 795.4320068359375, 154.42381286621094, 252.88931274414062, -58.13018798828125, 153.32293701171875, 448.4827880859375, 195.79295349121094, 23.050941467285156, 96.73087310791016, 458.812744140625, -45.0526123046875, 375.30029296875, 397.7352600097656, 558.0313720703125, 26.311168670654297, 434.8873596191406, 4.926486968994141, 114.87947845458984, 746.2402954101562, 473.2811279296875, 695.4412231445312, 536.4232788085938, 163.14671325683594, 613.814208984375, 474.58929443359375, -99.02091979980469, 249.75328063964844, 164.85658264160156, 323.70367431640625, 71.99435424804688, 240.22906494140625, 570.5482788085938, -26.81123161315918, -38.84156799316406, 620.7570190429688, 231.09088134765625, 647.64111328125, 358.2311096191406, 299.83380126953125, -83.9791488647461, 145.93312072753906, 69.3428726196289, 262.0445556640625, -56.351768493652344, -201.60223388671875, 153.53375244140625, 133.35415649414062, -130.00335693359375, 653.1513671875, 433.87298583984375, 125.4455337524414, 529.1565551757812, -191.45924377441406, 394.1949157714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 184.0438232421875, "std": 242.9461212158203, "min": -417.9989013671875, "p10": -66.90041198730466, "median": 171.98582458496094, "p90": 498.17203979492194, "max": 865.935791015625, "pos_frac": 0.78125, "sample": [50.59931182861328, -150.81480407714844, 865.935791015625, 98.53277587890625, 270.4539794921875, 99.21113586425781, -42.912620544433594, 191.6292724609375, 146.00534057617188, 646.6171875, 22.34245491027832, 111.9027099609375, 322.1398010253906, 506.70166015625, 650.9041748046875, 278.8857421875, -33.09046936035156, 118.62782287597656, -80.58135223388672, -43.313575744628906, 576.6057739257812, 98.06938934326172, -117.33805847167969, 478.26959228515625, 850.1679077148438, 288.11785888671875, -30.43203353881836, -74.03434753417969, 175.28456115722656, 306.87115478515625, 70.27276611328125, 343.7565612792969, 63.02867889404297, 391.4073181152344, 235.3539581298828, 111.2865219116211, 576.364013671875, 24.248130798339844, -49.252777099609375, 423.3116760253906, 456.241455078125, 190.01937866210938, -15.706920623779297, -90.34952545166016, 367.71661376953125, 263.5548400878906, 102.00096893310547, 178.82000732421875, 136.2909698486328, -50.25456237792969, -319.36773681640625, 42.839576721191406, 224.79344177246094, 239.655517578125, 326.0869140625, 321.89654541015625, 225.09634399414062, 170.78030395507812, 73.99711608886719, 226.30203247070312, 181.4473876953125, 173.19134521484375, -417.9989013671875, 0.6161727905273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 233.0213623046875, "std": 266.93145751953125, "min": -342.81402587890625, "p10": -124.03312225341794, "median": 233.52720642089844, "p90": 595.5888977050782, "max": 1067.765869140625, "pos_frac": 0.8125, "sample": [328.9384765625, 165.02735900878906, 321.5424499511719, 269.80743408203125, 53.25587463378906, 288.98919677734375, 370.0204162597656, 520.8851318359375, 242.668701171875, 112.0919418334961, -96.82618713378906, -260.3973083496094, 327.247802734375, 144.3650665283203, 319.5418701171875, 142.0709991455078, 122.0375747680664, 226.5679473876953, 303.7104187011719, 168.26339721679688, 267.0782470703125, 543.7245483398438, 228.18069458007812, 233.85818481445312, 308.15838623046875, 21.468185424804688, 303.3904724121094, 831.2652587890625, 529.657958984375, 330.72674560546875, -135.05795288085938, 162.39173889160156, 245.3343963623047, 609.5564575195312, -328.693115234375, 344.72650146484375, -342.81402587890625, -152.20416259765625, -196.00088500976562, 378.1365661621094, 603.749267578125, 437.6750793457031, 610.31689453125, -7.633939743041992, 173.4260711669922, 57.06922912597656, 614.7298583984375, 610.0289306640625, 289.8551940917969, 576.5480346679688, 1067.765869140625, -98.30851745605469, 183.6955108642578, 436.250732421875, -2.088825225830078, 42.48120880126953, -212.75640869140625, -42.131553649902344, 375.8304443359375, 172.4857635498047, 214.5338134765625, 184.63327026367188, 233.19622802734375, 139.3230743408203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 248.263916015625, "std": 281.1155090332031, "min": -267.4578552246094, "p10": -94.63702316284179, "median": 230.31873321533203, "p90": 594.7659118652344, "max": 1050.8538818359375, "pos_frac": 0.78125, "sample": [152.40774536132812, 168.73684692382812, 950.80810546875, 99.74330139160156, 311.3589782714844, -133.98623657226562, 145.16824340820312, -267.4578552246094, 903.5492553710938, 227.7650909423828, 463.47662353515625, 361.8741455078125, -50.46435546875, 405.8477783203125, 178.08547973632812, 598.7455444335938, 649.1328735351562, 410.1795349121094, 434.9571533203125, -133.27862548828125, 347.62164306640625, 352.35321044921875, -83.14469146728516, 244.02117919921875, 87.3174057006836, -74.56238555908203, 275.38250732421875, 305.6753845214844, -46.32853698730469, 610.817138671875, -98.86595153808594, 229.2417449951172, -69.9614028930664, 142.6396484375, 495.3260498046875, 125.90788269042969, 214.78033447265625, 231.39572143554688, 1050.8538818359375, 455.7131652832031, 364.38946533203125, 156.93942260742188, 106.24810791015625, -175.04129028320312, 725.0782470703125, 88.05072784423828, 467.98406982421875, 182.32273864746094, -10.056486129760742, 3.1233978271484375, 356.659912109375, 459.40496826171875, 317.19091796875, 421.6898193359375, 474.3455810546875, 585.4801025390625, 507.78643798828125, 25.768659591674805, -154.8308868408203, -84.76952362060547, 73.00686645507812, 241.4572296142578, 271.0145263671875, -187.18624877929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 214.68557739257812, "std": 289.9501037597656, "min": -410.56732177734375, "p10": -92.71466522216797, "median": 205.7864532470703, "p90": 588.4740905761722, "max": 1115.405029296875, "pos_frac": 0.78125, "sample": [755.38671875, 115.60918426513672, 318.214599609375, 241.88058471679688, -99.84679412841797, 241.6312255859375, 305.38323974609375, 313.9314270019531, 236.087158203125, 513.4874877929688, 467.7286071777344, 453.21368408203125, 142.86672973632812, 41.57479476928711, -248.39060974121094, 306.31097412109375, 380.26495361328125, 197.13040161132812, 173.58743286132812, 661.890869140625, 787.2356567382812, 150.31411743164062, -376.2879638671875, 398.9230041503906, 212.97430419921875, 290.2388000488281, 801.96435546875, -335.5303649902344, -96.67710876464844, 140.6299285888672, 38.98585510253906, 503.22674560546875, -64.22702026367188, 317.9525146484375, 231.77676391601562, 428.2471923828125, 309.7896728515625, -258.5341796875, 1115.405029296875, 21.37311553955078, -83.46896362304688, 111.36175537109375, 56.61579132080078, 121.0526123046875, -15.293128967285156, 143.19351196289062, -77.6881103515625, -7.834583282470703, 211.75442504882812, 266.67437744140625, 742.4572143554688, 620.6112060546875, -15.86037826538086, -410.56732177734375, 199.8184814453125, 169.81686401367188, 75.81884002685547, 138.45716857910156, 19.936311721801758, 499.75177001953125, 233.17327880859375, 292.564453125, 348.0111083984375, -36.202545166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 233.3800811767578, "std": 254.5532989501953, "min": -347.0244445800781, "p10": -69.44470901489257, "median": 273.8768310546875, "p90": 567.6863952636719, "max": 847.3502197265625, "pos_frac": 0.75, "sample": [-180.3157958984375, 847.3502197265625, 353.4366760253906, 792.0657958984375, 102.59066009521484, -73.18316650390625, 531.9689331054688, 396.1771545410156, 345.4164123535156, -88.33335876464844, -9.196182250976562, -60.721641540527344, 300.5977783203125, 271.05133056640625, 358.8191223144531, 372.556396484375, -81.63774871826172, 297.7286682128906, -40.29247283935547, -146.21286010742188, 630.6536865234375, 572.480224609375, 283.14935302734375, 590.2271118164062, -59.3946533203125, 295.0644226074219, 349.49041748046875, 499.71923828125, 62.529457092285156, 587.0057983398438, -48.08039855957031, 261.5704040527344, 16.905933380126953, 310.43212890625, -28.56585693359375, 24.08993148803711, 349.75189208984375, 178.32908630371094, -3.7784385681152344, 362.19970703125, 488.53167724609375, 376.830078125, 460.06817626953125, -112.62230682373047, 556.5007934570312, 126.33625793457031, 276.70233154296875, -6.269359588623047, 270.086669921875, -27.922348022460938, 32.49060821533203, 695.7103881835938, 283.5233459472656, 239.759765625, 428.4747314453125, 484.72662353515625, 115.84829711914062, 85.78510284423828, 27.944185256958008, -347.0244445800781, 7.492095947265625, 326.31170654296875, 379.8993835449219, 243.49609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 192.88055419921875, "std": 265.7799377441406, "min": -446.70391845703125, "p10": -97.13179016113281, "median": 202.23419952392578, "p90": 549.4828002929688, "max": 804.7879028320312, "pos_frac": 0.796875, "sample": [535.95361328125, 238.88238525390625, 665.7630615234375, -446.70391845703125, 573.5576171875, -373.79290771484375, 526.2142944335938, 804.7879028320312, -4.377391815185547, 114.00676727294922, 287.9840393066406, 69.60009765625, 156.77203369140625, 132.59239196777344, 22.00206756591797, 330.33782958984375, 105.2746353149414, 234.39108276367188, -98.4600601196289, 656.3535766601562, -25.54285430908203, 118.21903991699219, 553.7040405273438, -115.77837371826172, -88.45637512207031, 400.8332824707031, 69.90030670166016, 115.08287048339844, 396.9896240234375, 28.281341552734375, 498.6064147949219, 223.73309326171875, 534.4280395507812, -65.37709045410156, 339.47747802734375, 35.66192626953125, -30.227432250976562, 246.19390869140625, 268.27264404296875, 256.6103820800781, 296.234619140625, 49.380126953125, 194.52606201171875, 539.6332397460938, -414.11981201171875, 101.97682189941406, 341.29644775390625, 644.7858276367188, 406.0404357910156, -94.0324935913086, 245.109130859375, 579.2135009765625, -144.5362548828125, 341.0262145996094, -283.3759765625, 231.29681396484375, 122.5718994140625, 238.20809936523438, 40.365806579589844, 284.57061767578125, 12.166862487792969, 49.81283187866211, 209.9423370361328, 60.51123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 184.39854431152344, "std": 315.3792724609375, "min": -446.99969482421875, "p10": -232.42088928222654, "median": 152.00827026367188, "p90": 584.6865905761719, "max": 1024.4886474609375, "pos_frac": 0.75, "sample": [-257.0360412597656, 623.8839111328125, -204.31832885742188, 185.02267456054688, 64.24451446533203, 284.30419921875, 206.096435546875, 284.4625549316406, 45.63932800292969, 180.0458984375, 895.760009765625, -280.6370849609375, 181.3994140625, 61.595947265625, -92.92881774902344, 890.8302001953125, -291.96331787109375, -358.45458984375, 319.2380065917969, 256.1937255859375, 22.375579833984375, 127.46871185302734, -123.35552978515625, 157.56446838378906, 387.43780517578125, 934.0820922851562, 511.02215576171875, -103.36009216308594, 81.088134765625, 585.8380126953125, 464.1549987792969, 76.78907775878906, 47.26964569091797, 21.43970489501953, 415.0207824707031, 312.5066223144531, 146.4520721435547, 358.5600891113281, 179.38912963867188, -9.997858047485352, 493.3358154296875, -446.99969482421875, -139.7060546875, 394.190185546875, 595.4743041992188, 427.7643127441406, 144.13790893554688, -260.6274719238281, -244.46484375, 12.395179748535156, 209.91769409179688, 59.33226776123047, 294.0460205078125, -114.25546264648438, 81.22859191894531, 102.89254760742188, 494.20794677734375, -46.64506530761719, 6.078182220458984, 1024.4886474609375, 581.9999389648438, 263.3779296875, -52.94093322753906, 337.1546936035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 165.94529724121094, "std": 240.4317169189453, "min": -378.29412841796875, "p10": -99.71166610717773, "median": 143.60997009277344, "p90": 498.9376464843751, "max": 838.9676513671875, "pos_frac": 0.796875, "sample": [-372.8923645019531, -378.29412841796875, 199.11550903320312, -93.44121551513672, 299.75164794921875, 277.69122314453125, 79.04926300048828, -131.24415588378906, 198.81268310546875, 349.6470947265625, 384.8447265625, 525.9310913085938, 354.4590148925781, 173.38302612304688, 141.66468811035156, 99.13147735595703, 3.5163345336914062, 145.5552520751953, 82.04676818847656, 232.9535675048828, -188.578125, 208.805419921875, 299.1431884765625, 620.2652587890625, 34.46760940551758, 6.283164978027344, 101.80488586425781, 79.5659408569336, 138.3479766845703, 13.458412170410156, 433.6445007324219, -102.39900207519531, 838.9676513671875, -18.47738265991211, 574.0634155273438, 20.21550750732422, 592.977294921875, -35.19301223754883, 246.76144409179688, 201.61923217773438, -76.98729705810547, 188.060791015625, 472.7518615722656, -43.088104248046875, 194.4425048828125, 361.04156494140625, 187.74102783203125, 254.68017578125, 113.85086059570312, 189.40399169921875, 438.6231384277344, -289.31689453125, 27.704360961914062, 717.35400390625, 35.817657470703125, -1.8443832397460938, 510.1601257324219, 79.1391372680664, -120.59748840332031, 292.208251953125, 91.68147277832031, 62.13734817504883, 80.249267578125, 217.86090087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 174.4063262939453, "std": 207.72567749023438, "min": -382.3087158203125, "p10": -47.55031471252441, "median": 152.30946350097656, "p90": 430.67299194335936, "max": 792.689697265625, "pos_frac": 0.828125, "sample": [78.54171752929688, -43.276424407958984, 196.27305603027344, 429.339599609375, 257.28814697265625, -41.630523681640625, 127.31826782226562, 87.57662963867188, 339.7549743652344, -219.65386962890625, -72.95867919921875, -382.3087158203125, 522.1840209960938, 27.17424774169922, -124.06014251708984, 165.23866271972656, 161.39784240722656, 60.005611419677734, 303.5276184082031, 209.5858154296875, 4.902313232421875, 171.41537475585938, 256.6424560546875, 287.7562255859375, -58.77104187011719, 131.44349670410156, 670.626220703125, 176.98951721191406, 90.52845764160156, 107.95831298828125, 410.13751220703125, 219.42535400390625, 279.57159423828125, 140.24746704101562, 235.49276733398438, -12.349365234375, 15.334075927734375, 378.08258056640625, -45.38791275024414, 77.20870971679688, 143.22108459472656, 32.533447265625, 103.37077331542969, 224.90191650390625, 8.8983154296875, 114.84803771972656, 343.02197265625, 384.1720886230469, 219.8157196044922, 189.19017028808594, 102.01695251464844, 498.0049133300781, 792.689697265625, -48.47705841064453, 268.51910400390625, 212.84378051757812, 574.5660400390625, 420.4903869628906, 14.795858383178711, -103.83549499511719, 438.632080078125, 431.24444580078125, 45.58742141723633, 132.38137817382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 239.12875366210938, "std": 259.8718566894531, "min": -320.6236267089844, "p10": -98.57339630126951, "median": 261.2039337158203, "p90": 590.7962280273439, "max": 728.7653198242188, "pos_frac": 0.8125, "sample": [261.9575500488281, 245.95726013183594, 514.8082885742188, 206.8268280029297, 107.82557678222656, 416.9704284667969, 14.211585998535156, 376.54742431640625, 355.86083984375, 137.83480834960938, 442.65338134765625, 284.74285888671875, 163.57261657714844, 418.13043212890625, -180.78814697265625, 476.3298645019531, -58.37185287475586, 606.305908203125, 430.5334777832031, 370.6888122558594, -246.25436401367188, 723.5882568359375, 600.841796875, 362.952880859375, 74.34332275390625, 608.9763793945312, 298.44091796875, -200.74790954589844, 368.52569580078125, 380.19671630859375, 460.6844482421875, -213.33172607421875, 3.533111572265625, 111.45121765136719, 345.953125, 150.4849090576172, 322.77984619140625, 260.4503173828125, 216.6673583984375, 178.98431396484375, -30.091598510742188, 126.77767944335938, -79.21505737304688, 447.9141845703125, 200.95986938476562, -106.86982727050781, -39.51203536987305, 390.2982177734375, 508.21002197265625, 45.478668212890625, 636.2584838867188, 56.2734260559082, 728.7653198242188, -270.8426513671875, 519.9034423828125, -40.7674560546875, 73.72731018066406, 647.16015625, 3.1237030029296875, 431.33087158203125, 305.9071350097656, 567.3565673828125, 101.5992202758789, -320.6236267089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 216.69256591796875, "std": 276.9777526855469, "min": -395.523681640625, "p10": -98.60994873046869, "median": 167.49030303955078, "p90": 587.8547607421878, "max": 979.890625, "pos_frac": 0.75, "sample": [167.88987731933594, -133.07601928710938, 241.27218627929688, 167.09072875976562, 29.3883056640625, 481.55767822265625, 339.79522705078125, 139.8871307373047, 43.75334167480469, -395.523681640625, 101.45844268798828, 485.4651184082031, 539.0447998046875, 410.18609619140625, 295.55841064453125, 454.98583984375, -0.37932586669921875, -177.3490447998047, -39.049713134765625, 193.65802001953125, -42.882015228271484, -29.644317626953125, 379.37701416015625, 925.0892333984375, 241.55917358398438, 401.2227478027344, 394.9605407714844, 231.49923706054688, 658.6156616210938, 377.1220397949219, -153.09841918945312, 145.61135864257812, 287.418701171875, -159.5149688720703, 54.596107482910156, 97.96884155273438, 298.2661437988281, 457.7721252441406, 82.103515625, 177.46847534179688, -12.588907241821289, 123.8299331665039, -122.49334716796875, 159.57958984375, -36.35942840576172, 453.5835266113281, 90.68818664550781, 278.60943603515625, 323.6701965332031, 181.92221069335938, 750.361572265625, 608.7733154296875, 64.06828308105469, -9.290374755859375, 2.0401954650878906, 797.7863159179688, 979.890625, 78.80233764648438, 678.1690673828125, -40.15409469604492, -39.50926971435547, 384.0135498046875, 142.073974609375, -140.26739501953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 221.80587768554688, "std": 281.9940185546875, "min": -695.420654296875, "p10": -77.99343185424804, "median": 213.33596801757812, "p90": 592.8671875, "max": 954.5140991210938, "pos_frac": 0.796875, "sample": [208.55117797851562, 419.42913818359375, 569.2393798828125, 380.94158935546875, 315.2428894042969, -77.4541244506836, 474.2237854003906, -60.241355895996094, 593.8748168945312, 234.06451416015625, 57.8736572265625, 446.3522033691406, 91.0127944946289, -26.624465942382812, 66.07228088378906, 135.55313110351562, 260.5650634765625, 278.4356994628906, 62.42362976074219, 780.1973876953125, 129.68508911132812, 120.48075866699219, 208.45787048339844, 92.45089721679688, -695.420654296875, 267.6728210449219, 25.87041473388672, 954.5140991210938, 621.4083251953125, -74.58799743652344, 283.7999572753906, -18.88900375366211, 418.12799072265625, 105.85404205322266, 242.3610076904297, 386.32904052734375, 621.5108032226562, 463.70465087890625, 764.4862670898438, 218.12075805664062, 74.81463623046875, -103.50453186035156, -199.22984313964844, 537.1378784179688, 459.3119201660156, -114.7772216796875, 344.00677490234375, -138.2993621826172, 273.6123352050781, 126.17802429199219, 206.87774658203125, -78.22456359863281, 257.7911376953125, 223.76785278320312, 62.637001037597656, 63.738380432128906, 251.56211853027344, 426.967529296875, 675.7280883789062, -29.67913818359375, -318.9365539550781, 193.3536376953125, 590.5160522460938, 64.55561065673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 220.24560546875, "std": 259.8302307128906, "min": -375.95489501953125, "p10": -101.64796600341796, "median": 220.25914001464844, "p90": 485.9283447265625, "max": 822.6556396484375, "pos_frac": 0.828125, "sample": [447.2188720703125, 0.36907958984375, -157.899658203125, -56.66713333129883, 416.680908203125, 487.56787109375, 165.2178192138672, 183.21157836914062, -78.50774383544922, 94.0594711303711, -151.6609649658203, 141.9662322998047, 133.96713256835938, 420.23333740234375, 482.102783203125, 55.60865020751953, 313.4647521972656, 294.3413391113281, 259.17352294921875, 434.3941955566406, -95.47785949707031, 65.49249267578125, 341.0826110839844, 44.81785202026367, 141.470947265625, 298.6368713378906, -264.75048828125, 294.0748291015625, 246.9781494140625, 215.35757446289062, 434.7599792480469, -3.120685577392578, -375.95489501953125, 225.16070556640625, 576.0750732421875, 210.7064208984375, 13.165420532226562, 762.3226318359375, 20.806556701660156, 171.131591796875, 24.538108825683594, 456.62823486328125, 138.82395935058594, 822.6556396484375, -256.58251953125, 787.211669921875, 740.7882080078125, 298.6052551269531, 385.66070556640625, 72.25952911376953, 360.06683349609375, 444.3515625, 161.81561279296875, 717.6265869140625, 36.88030242919922, 280.1762390136719, 253.53567504882812, 350.67431640625, -169.76229858398438, 373.9051513671875, 48.43353271484375, 366.216796875, 297.9240417480469, -104.29229736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 235.46646118164062, "std": 187.79071044921875, "min": -139.11669921875, "p10": 27.710479354858414, "median": 211.47582244873047, "p90": 423.8917510986329, "max": 819.2714233398438, "pos_frac": 0.9375, "sample": [155.01463317871094, 701.0055541992188, 317.50762939453125, -26.364532470703125, 348.5595397949219, 407.94677734375, 74.61392211914062, 44.73747253417969, 290.11517333984375, 145.3994140625, 727.5765380859375, 317.1669616699219, 819.2714233398438, 19.24695587158203, 165.88661193847656, 328.8338623046875, 619.0126342773438, 209.79820251464844, 154.70269775390625, 213.1534423828125, 430.7253112792969, 400.25238037109375, 451.22015380859375, 20.413196563720703, 66.05183410644531, 165.37098693847656, 231.68760681152344, 254.47271728515625, 394.156494140625, 17.17779541015625, 192.68075561523438, -91.90440368652344, 258.087890625, 184.504638671875, 134.2799072265625, 179.03555297851562, -134.49920654296875, 150.08340454101562, 279.41058349609375, 633.2017211914062, 228.75802612304688, 236.73597717285156, 163.39227294921875, 363.1061706542969, 74.99527740478516, 116.90975952148438, 302.2146911621094, 181.12350463867188, 300.32733154296875, 183.05601501464844, -139.11669921875, 220.6964111328125, 139.1499786376953, 117.73454284667969, 316.8591003417969, 284.04730224609375, 346.8545227050781, 52.781898498535156, 162.4250946044922, 306.95941162109375, 190.18284606933594, 251.01144409179688, 342.20556640625, 77.84866333007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 163.52249145507812, "std": 220.13352966308594, "min": -265.7468566894531, "p10": -69.2590003967285, "median": 123.4211311340332, "p90": 449.73678894042985, "max": 748.87109375, "pos_frac": 0.78125, "sample": [696.5332641601562, 219.638427734375, 161.5466766357422, 345.6402893066406, 29.05364990234375, 12.911849975585938, 42.64994812011719, 405.06817626953125, 410.26025390625, 227.9137725830078, -20.360641479492188, 45.90330505371094, -172.38070678710938, 748.87109375, 160.9624786376953, 126.04436492919922, 118.61129760742188, 89.51322937011719, 79.15998077392578, 2.031461715698242, 161.69882202148438, 46.25120544433594, 466.6553039550781, -123.09150695800781, 377.2098388671875, -7.894664764404297, -116.86593627929688, 48.17327117919922, 566.0557250976562, 265.2317810058594, 186.00970458984375, 175.45706176757812, 252.3577880859375, -54.557029724121094, -45.31397247314453, 75.9015121459961, 531.6361083984375, 504.4643249511719, -75.55984497070312, 278.58489990234375, 173.9771728515625, -180.91494750976562, 285.484130859375, 120.79789733886719, 367.07366943359375, 719.2174072265625, 95.97560119628906, 340.9924621582031, -26.606231689453125, 81.02960205078125, -265.7468566894531, 210.94125366210938, 368.2563171386719, -99.00033569335938, 392.2370300292969, 5.440769195556641, 13.205062866210938, 5.913143157958984, -51.07078552246094, 202.42059326171875, 289.99163818359375, 90.5411605834961, 128.0736846923828, -44.76702880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 223.31976318359375, "std": 297.8357849121094, "min": -335.5612487792969, "p10": -178.05206451416015, "median": 237.81937408447266, "p90": 560.3932495117188, "max": 910.4530029296875, "pos_frac": 0.75, "sample": [-37.88429260253906, 306.4292297363281, 910.4530029296875, -28.667984008789062, 22.140419006347656, 34.50920104980469, 299.0703125, 260.93218994140625, 77.18994140625, 19.69509506225586, 719.3062744140625, -335.5612487792969, 245.42681884765625, -180.29855346679688, 547.229736328125, -209.82968139648438, 740.1189575195312, -124.09530639648438, 697.848876953125, 445.2283630371094, 16.721553802490234, 288.0543212890625, -18.438919067382812, 362.4900207519531, 502.97796630859375, 538.3388671875, -209.1165771484375, 136.15567016601562, 42.411376953125, 462.2780456542969, 226.94757080078125, 21.805707931518555, -140.97579956054688, 548.5372314453125, 557.6665649414062, 382.3432922363281, -195.660888671875, 14.641593933105469, 561.5618286132812, 414.1451110839844, 411.08929443359375, -151.31983947753906, -32.8018798828125, 531.8223876953125, 494.51739501953125, -0.9722976684570312, 230.21192932128906, 197.1180877685547, 674.4520874023438, 295.3045959472656, 502.40765380859375, 339.587646484375, 621.0353393554688, 158.03155517578125, 284.33978271484375, -172.8102569580078, 481.86346435546875, 46.55203628540039, 429.67462158203125, -262.68707275390625, 465.02618408203125, -334.6875305175781, 82.33403015136719, 80.24934387207031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 199.5938720703125, "std": 305.2069396972656, "min": -539.0484008789062, "p10": -137.91086273193358, "median": 170.14231872558594, "p90": 679.1819763183595, "max": 812.0140380859375, "pos_frac": 0.78125, "sample": [657.8775634765625, 93.33108520507812, -217.80148315429688, 476.72637939453125, 221.18991088867188, 47.33075714111328, -70.14043426513672, 76.43226623535156, 176.24472045898438, 40.78343200683594, 256.4994812011719, -36.77594757080078, 324.8197326660156, 202.49130249023438, -148.8164825439453, 220.9105682373047, -33.80751037597656, 489.51629638671875, -539.0484008789062, 737.8026123046875, 25.763729095458984, 248.41384887695312, -36.13984680175781, -284.4018249511719, 409.8448486328125, 224.59671020507812, 432.801025390625, -363.0068359375, -292.8887634277344, -58.61228942871094, 50.84117126464844, -112.46441650390625, 201.73191833496094, 79.83734130859375, 800.7316284179688, 716.3616943359375, 293.4909973144531, 55.31464385986328, 117.08355712890625, 512.345703125, 68.66655731201172, 257.600830078125, 628.4239501953125, 490.36041259765625, 25.437667846679688, 73.2314453125, 688.3124389648438, 154.51498413085938, 812.0140380859375, 567.533203125, -31.988929748535156, -427.29437255859375, 97.45556640625, 259.5613098144531, 196.8491973876953, 720.5134887695312, 313.2886657714844, 146.3929901123047, 50.48521423339844, 434.1270446777344, 164.0399169921875, 765.98779296875, 183.06143188476562, 138.22164916992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 148.3397216796875, "std": 272.0230712890625, "min": -334.8121337890625, "p10": -216.42090301513667, "median": 126.96805191040039, "p90": 493.8177795410156, "max": 972.2484741210938, "pos_frac": 0.6875, "sample": [106.86944580078125, 446.4100036621094, 353.8218994140625, -20.895355224609375, 230.21397399902344, -91.39336395263672, 27.84824562072754, 462.5985107421875, 27.90719985961914, 10.873987197875977, -287.9341735839844, -8.975540161132812, 337.61761474609375, 215.41136169433594, -77.47676849365234, 505.87255859375, 27.68254852294922, 189.20632934570312, -262.5095520019531, 223.40090942382812, -156.57313537597656, 47.04811096191406, 248.87661743164062, 286.5837097167969, 707.503173828125, 531.3849487304688, 407.0147399902344, 329.5986328125, 22.078447341918945, 613.3251953125, 972.2484741210938, -334.8121337890625, 126.97573852539062, -298.2803649902344, 488.28729248046875, -89.15409088134766, 496.18798828125, 27.39116096496582, 95.10357666015625, 517.8746948242188, -151.9033203125, 251.4741973876953, -75.7531967163086, 129.86212158203125, 250.98599243164062, -67.36620330810547, 250.64524841308594, -58.2060546875, -242.26889038085938, -22.85736083984375, 109.50189208984375, -128.95582580566406, 475.78729248046875, -242.0699462890625, -155.0150146484375, 384.7313232421875, 239.6545867919922, 456.68634033203125, 11.571060180664062, -252.26950073242188, 249.89324951171875, 328.64898681640625, 126.96036529541016, 168.79281616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 250.28538513183594, "std": 353.48211669921875, "min": -607.3175659179688, "p10": -110.7415687561035, "median": 215.1184844970703, "p90": 764.7559204101564, "max": 1202.60693359375, "pos_frac": 0.796875, "sample": [441.61578369140625, 66.78659057617188, 993.6636962890625, -45.48792266845703, 525.0518798828125, 270.21661376953125, 279.22283935546875, -115.7101058959961, -85.23947143554688, 894.5892333984375, 303.97601318359375, 325.2164306640625, 183.18807983398438, 565.559814453125, 1202.60693359375, -99.1483154296875, 804.892333984375, 312.3677673339844, 344.49212646484375, 394.83367919921875, -94.98576354980469, 65.86830139160156, 160.09837341308594, 309.510009765625, 164.30291748046875, 372.1508483886719, 154.26463317871094, 78.33935546875, 781.6030883789062, 246.46588134765625, 187.04429626464844, 45.98682403564453, 179.28309631347656, 224.84921264648438, 168.4049530029297, -171.31353759765625, -375.115966796875, 725.4458618164062, -94.65670776367188, -237.20477294921875, 669.2738037109375, 56.53907012939453, 808.63671875, 70.58708190917969, 409.1644287109375, 62.177215576171875, -607.3175659179688, 105.49871063232422, 205.38775634765625, 318.1570129394531, 56.57301712036133, 180.31924438476562, 183.65379333496094, -13.339071273803711, 283.4266662597656, 470.31640625, 566.1881713867188, 1034.779296875, -379.89532470703125, 371.586181640625, -511.9627685546875, 409.5476379394531, 313.39849853515625, 502.5333251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 239.520263671875, "std": 280.8944396972656, "min": -289.07257080078125, "p10": -113.56970901489257, "median": 207.31287384033203, "p90": 588.6547119140625, "max": 1035.9970703125, "pos_frac": 0.84375, "sample": [898.0836181640625, 234.15167236328125, -289.07257080078125, 133.01998901367188, 597.3004760742188, 529.35400390625, 77.4360580444336, 247.19561767578125, 40.1535758972168, 429.3333740234375, 28.25135040283203, 253.30599975585938, 282.9464111328125, 398.6458740234375, 494.8749694824219, -117.15955352783203, 197.44967651367188, 231.2726593017578, -56.95280456542969, 568.4812622070312, 215.85459899902344, 196.0982666015625, 23.829626083374023, 544.2632446289062, 170.6251983642578, -182.86212158203125, 1035.9970703125, 78.97406005859375, -191.15447998046875, -221.51617431640625, 219.27731323242188, 144.6455535888672, 281.21417236328125, -181.63479614257812, 208.2891082763672, 116.73680114746094, 475.2613220214844, -16.248275756835938, 257.55224609375, 411.774169921875, 43.91361618041992, 803.577392578125, 35.00909423828125, 186.25973510742188, -264.5413818359375, 91.66195678710938, 823.6683349609375, 534.1041259765625, 348.80181884765625, 58.70526885986328, 182.69268798828125, 82.03203582763672, 638.3099365234375, 403.27972412109375, 78.23686218261719, 703.7645874023438, 372.08984375, 206.33663940429688, 90.45528411865234, 319.7177429199219, 335.7529602050781, 191.35513305664062, -105.19340515136719, 404.2591857910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 240.65579223632812, "std": 242.7720184326172, "min": -242.59222412109375, "p10": -23.448664093017577, "median": 205.57323455810547, "p90": 581.8801879882812, "max": 862.170654296875, "pos_frac": 0.859375, "sample": [95.22962951660156, 581.9783325195312, 61.14930725097656, -242.59222412109375, 226.49441528320312, 152.68862915039062, -115.36993408203125, 266.00140380859375, 605.6941528320312, 246.07754516601562, 97.35243225097656, 55.72468948364258, -139.03903198242188, 560.9989013671875, 162.05792236328125, 287.2239990234375, 22.045318603515625, 571.371826171875, 469.15411376953125, 513.8570556640625, 667.7861328125, -78.83805084228516, 228.64703369140625, 200.99598693847656, 287.22216796875, 28.348922729492188, 423.66558837890625, -22.638023376464844, -16.323646545410156, 12.568962097167969, 274.5426330566406, 603.4755859375, 542.8827514648438, 108.946533203125, 215.79879760742188, 417.3874206542969, 186.58343505859375, 56.93135452270508, 369.1749572753906, 89.81102752685547, 402.7054748535156, 82.08946228027344, 46.069580078125, 140.98098754882812, 565.4500732421875, 22.863832473754883, 234.16073608398438, 170.22201538085938, 332.52996826171875, 159.3364715576172, -23.79608154296875, 48.72153091430664, 578.22216796875, 862.170654296875, -43.55553436279297, 365.5650939941406, 596.1948852539062, 210.15048217773438, 382.092041015625, 148.15301513671875, 595.3928833007812, -209.79116821289062, 79.32283020019531, 581.6511840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 226.93565368652344, "std": 265.1220397949219, "min": -391.2104187011719, "p10": -65.30329513549805, "median": 203.07388305664062, "p90": 581.6859191894532, "max": 918.9120483398438, "pos_frac": 0.8125, "sample": [71.80056762695312, -65.52014923095703, -64.79730224609375, -47.217689514160156, 644.6561279296875, 34.83210754394531, 17.863174438476562, 119.86978149414062, 405.5144348144531, -14.329734802246094, 918.9120483398438, 49.56105041503906, 74.04159545898438, 147.03890991210938, 330.6964111328125, 419.6470947265625, 16.560800552368164, -21.141883850097656, 104.18347930908203, 595.244384765625, 296.65301513671875, 72.43836212158203, 253.47817993164062, -88.3436050415039, 150.86151123046875, 144.02032470703125, 745.3524169921875, 31.629653930664062, 380.6474609375, 683.9609985351562, -225.13677978515625, 265.6473388671875, -391.2104187011719, 193.36708068847656, 503.3748474121094, 368.4615478515625, 185.35989379882812, -14.973230361938477, 274.9389953613281, 530.8834838867188, 159.23431396484375, 297.7603454589844, 539.93896484375, 529.7906494140625, 132.21353149414062, 670.9812622070312, -213.2054443359375, 646.3013916015625, 365.02655029296875, -251.7703399658203, 263.4462890625, 148.6861572265625, 26.517990112304688, 235.24609375, 15.527324676513672, 212.7806854248047, 231.7644805908203, -106.22872161865234, 484.464599609375, 317.3835144042969, 550.0494995117188, 500.3183898925781, 370.012939453125, 298.8148193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 195.55636596679688, "std": 284.56243896484375, "min": -484.00042724609375, "p10": -140.3276634216308, "median": 179.22109985351562, "p90": 568.0337707519532, "max": 1045.67431640625, "pos_frac": 0.78125, "sample": [140.09417724609375, 243.72732543945312, 72.4838638305664, 227.65826416015625, 241.7319793701172, 569.4015502929688, 668.441650390625, -484.00042724609375, 247.6383056640625, -41.36186981201172, 402.4046325683594, 1045.67431640625, 74.73056030273438, -194.65338134765625, 609.4156494140625, -53.427024841308594, 305.3182067871094, 110.16695404052734, -86.94315338134766, 34.21236801147461, 174.77857971191406, -202.46566772460938, -349.1795959472656, 575.3126831054688, -185.25572204589844, 101.91864776611328, 53.022117614746094, 423.856689453125, -50.251373291015625, 326.7945251464844, 525.4314575195312, 82.15382385253906, 892.0144653320312, 636.6495361328125, 89.70120239257812, 175.94705200195312, 39.888431549072266, 319.6763916015625, 111.19337463378906, 189.20489501953125, 235.85855102539062, 50.706298828125, 224.8865966796875, 2.7976036071777344, -159.9392547607422, 262.7349548339844, 202.032470703125, -94.5672836303711, 503.17803955078125, 7.6253509521484375, 182.49514770507812, 459.0327453613281, 542.997802734375, 564.84228515625, 372.90869140625, 223.45736694335938, 157.48585510253906, -9.15993881225586, 397.6822814941406, 188.86026000976562, -302.53485107421875, 407.53826904296875, -68.07327270507812, 99.65594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 236.36013793945312, "std": 264.9059143066406, "min": -328.28814697265625, "p10": -88.26609802246094, "median": 242.41075897216797, "p90": 558.9176208496094, "max": 1053.1563720703125, "pos_frac": 0.8125, "sample": [1053.1563720703125, -109.44610595703125, -59.53526306152344, -172.1394805908203, 465.71636962890625, 534.99169921875, 777.729248046875, 478.3833312988281, 241.24424743652344, 267.8953552246094, -35.459991455078125, 288.5095520019531, 135.28591918945312, 316.65380859375, 21.21314811706543, -65.31538391113281, -93.07643127441406, 357.18585205078125, 95.77645111083984, 273.14483642578125, -171.44989013671875, 417.16876220703125, 498.43438720703125, 243.5772705078125, 43.989768981933594, 580.146240234375, 122.65201568603516, 264.73223876953125, 9.115859985351562, -11.583236694335938, 99.75471496582031, 454.342041015625, 609.3515625, 282.61151123046875, 325.026611328125, 404.1675109863281, 110.22425842285156, 24.457490921020508, -88.26155853271484, 478.3570556640625, 568.9158935546875, 277.11614990234375, 50.466796875, 87.6641616821289, -328.28814697265625, 535.5883178710938, 172.4894561767578, 232.07359313964844, -212.75714111328125, 377.4513244628906, 236.994140625, 354.977783203125, 483.683349609375, -88.2680435180664, 723.05810546875, 22.950973510742188, 328.83221435546875, 475.2779541015625, 395.8587646484375, 1.1375923156738281, 131.89248657226562, 111.82796478271484, 575.9036254882812, 143.47280883789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 201.69537353515625, "std": 291.35943603515625, "min": -794.6124267578125, "p10": -126.17605743408203, "median": 165.16150665283203, "p90": 522.6483734130861, "max": 1050.495849609375, "pos_frac": 0.765625, "sample": [662.4490356445312, -794.6124267578125, -258.81182861328125, 490.0655822753906, 60.97726821899414, 66.77545928955078, 193.72357177734375, 423.02728271484375, 432.06329345703125, 347.7457275390625, 111.82563781738281, 121.44837951660156, 202.35897827148438, 198.54649353027344, 536.6124267578125, 298.8404541015625, 75.51170349121094, 427.9970397949219, -197.1990966796875, -46.29963684082031, 478.72052001953125, -121.9354476928711, 117.40713500976562, -137.37472534179688, -116.5044937133789, 299.9408264160156, 962.2520751953125, 333.1253356933594, 101.12646484375, -135.0337371826172, -22.760478973388672, 1050.495849609375, 238.19171142578125, 111.6529541015625, 36.87025451660156, -100.33614349365234, 576.5472412109375, 474.55072021484375, 612.6153564453125, 158.8677520751953, -39.32221221923828, 197.84744262695312, 338.9974670410156, 51.302886962890625, 539.1080322265625, 391.14605712890625, 248.5654296875, 394.2045593261719, 137.7272491455078, 435.5941162109375, 471.2792053222656, 92.57608795166016, -156.59344482421875, 112.48771667480469, 430.69757080078125, -127.99346160888672, 169.31146240234375, -91.50211334228516, 142.01239013671875, 114.7369384765625, 195.6048583984375, 437.6485595703125, 161.0115509033203, -9.409278869628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 249.90182495117188, "std": 214.56275939941406, "min": -251.46514892578125, "p10": 18.849082946777347, "median": 231.94338989257812, "p90": 491.6659729003908, "max": 861.3141479492188, "pos_frac": 0.921875, "sample": [372.8415832519531, 207.380126953125, 172.63633728027344, 199.17257690429688, 245.81417846679688, 17.36004638671875, 298.3996887207031, 311.3642578125, 238.645263671875, 670.7966918945312, 22.323501586914062, -140.61468505859375, 239.659912109375, 46.24505615234375, 225.24151611328125, 133.6763916015625, 91.59550476074219, 243.50592041015625, 243.96932983398438, 735.4202880859375, 309.0870666503906, 339.1814880371094, 120.11763000488281, 351.578125, 439.0592041015625, 248.77850341796875, 66.57118225097656, 100.43978881835938, 861.3141479492188, 188.60667419433594, 51.39051055908203, 102.37774658203125, 348.6495361328125, 125.9134521484375, 176.83200073242188, 371.11676025390625, 272.91033935546875, 124.90301513671875, 3.1254348754882812, 395.8506774902344, -251.46514892578125, 343.5353088378906, 177.88885498046875, 425.4260559082031, 310.1950988769531, 776.10791015625, 455.65728759765625, 297.03802490234375, 108.00064849853516, 331.7200927734375, 151.7806396484375, 435.9736022949219, 92.84674072265625, -37.193782806396484, 507.0982666015625, 454.2844543457031, 201.4838104248047, 181.09967041015625, 122.22290802001953, 209.24508666992188, -42.172889709472656, 677.776611328125, 598.9161987304688, -106.98551177978516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 224.66973876953125, "std": 295.283203125, "min": -327.7279052734375, "p10": -63.78914642333983, "median": 177.98320770263672, "p90": 541.0164794921876, "max": 1523.6168212890625, "pos_frac": 0.828125, "sample": [65.0417251586914, -30.273794174194336, -52.61244201660156, -12.35659408569336, 84.07516479492188, 64.87594604492188, 200.46762084960938, 128.05235290527344, 130.3323211669922, 249.40574645996094, 43.247711181640625, 1020.8956298828125, 65.15215301513672, 266.23675537109375, 147.7895965576172, 178.28939819335938, 177.67701721191406, -327.7279052734375, -229.85702514648438, 266.5747985839844, 195.81455993652344, 73.96725463867188, 159.52398681640625, 33.51736831665039, 250.67617797851562, 850.4647216796875, 560.744140625, 415.111083984375, 93.26374816894531, -72.77035522460938, 418.8332824707031, 396.88409423828125, 277.9103698730469, 115.24127197265625, 198.96878051757812, 0.503021240234375, 398.59661865234375, -68.57916259765625, 309.95513916015625, 56.03086853027344, 551.247314453125, 299.447265625, 197.65882873535156, 62.64568328857422, 87.19205474853516, 1523.6168212890625, 473.36468505859375, 275.67822265625, 28.950319290161133, 587.7066040039062, -37.783363342285156, 495.1749572753906, 206.03736877441406, 564.3073120117188, 154.0735626220703, -119.22578430175781, -109.3928451538086, 463.25299072265625, 517.14453125, -265.5302429199219, 309.1351318359375, 386.38714599609375, 122.78144836425781, 505.0489807128906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 243.0369873046875, "std": 275.9240417480469, "min": -416.1407775878906, "p10": -55.46307220458982, "median": 208.203857421875, "p90": 626.9631469726564, "max": 906.1402587890625, "pos_frac": 0.8125, "sample": [38.59423065185547, 708.2452392578125, 150.51800537109375, 380.47161865234375, 108.77111053466797, -212.4532470703125, 691.7839965820312, -157.63807678222656, 106.79318237304688, 437.7354736328125, -22.536855697631836, 325.620361328125, 130.52099609375, 504.7490539550781, 583.429443359375, 146.89541625976562, -416.1407775878906, 280.9650573730469, 48.426570892333984, 31.164321899414062, 376.8932189941406, 394.05975341796875, 111.49824523925781, 427.90386962890625, 290.22052001953125, 595.8277587890625, 154.76309204101562, 332.9970703125, 162.50326538085938, 214.56712341308594, 40.69268798828125, 844.5201416015625, 408.9547119140625, 58.39512634277344, 159.405029296875, 393.5829772949219, 413.8262939453125, 449.684814453125, 382.6507263183594, 279.7894287109375, 379.4017333984375, 906.1402587890625, 120.2315673828125, 201.87054443359375, 507.556884765625, 335.95904541015625, -16.826427459716797, 723.3694458007812, 211.29580688476562, 467.76190185546875, -65.21065521240234, -165.7135009765625, 197.00662231445312, 640.306884765625, -8.768156051635742, -211.10964965820312, -32.718711853027344, 96.79244995117188, 680.0801391601562, 205.11190795898438, 243.57733154296875, 142.05923461914062, -349.6147155761719, -12.845230102539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 214.92916870117188, "std": 269.9494934082031, "min": -610.1494140625, "p10": -81.34828262329101, "median": 215.25608825683594, "p90": 587.981170654297, "max": 766.4418334960938, "pos_frac": 0.796875, "sample": [291.66119384765625, 154.94154357910156, 72.37895965576172, 382.1302490234375, 709.797119140625, 452.12353515625, 726.984130859375, 210.412353515625, 145.5366668701172, 766.4418334960938, -610.1494140625, 41.71031188964844, 439.8577880859375, 17.29617691040039, 330.1210632324219, 595.7158203125, -34.25567626953125, 322.6129150390625, 723.3885498046875, 22.524293899536133, 345.1805419921875, 220.09982299804688, -223.95596313476562, 498.08087158203125, 293.8583068847656, 349.71295166015625, 295.9969482421875, 173.76089477539062, 331.77447509765625, 41.287506103515625, -38.12939453125, -84.39196014404297, 185.25694274902344, 88.53562927246094, 620.2933349609375, 220.29734802246094, 203.27845764160156, 183.48263549804688, 299.23260498046875, -217.4807891845703, -56.965614318847656, 605.4089965820312, 296.1083068847656, 569.9336547851562, 193.38633728027344, 332.909912109375, 411.6731872558594, 350.189697265625, -19.885639190673828, -53.40597152709961, 281.2267150878906, 14.757244110107422, 82.27035522460938, -184.21197509765625, 68.52165222167969, 517.0289306640625, 122.03164672851562, 150.0225830078125, 467.0951232910156, -293.8835144042969, -74.24636840820312, -202.22769165039062, 266.00677490234375, 364.32171630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 219.19906616210938, "std": 290.0787048339844, "min": -376.2944030761719, "p10": -99.05900497436522, "median": 176.4446792602539, "p90": 629.1925231933594, "max": 864.6236572265625, "pos_frac": 0.75, "sample": [-376.2944030761719, 265.30242919921875, 512.8790893554688, 374.6471862792969, 125.24462127685547, -80.9904556274414, 209.84271240234375, 249.4413299560547, 755.2403564453125, 32.87560272216797, 325.2470703125, 33.02375030517578, 125.03901672363281, -45.02423095703125, 445.88409423828125, -303.505615234375, -237.97552490234375, -91.37625122070312, 720.7593994140625, 97.5516586303711, 488.5862121582031, -0.17269515991210938, 288.48779296875, 318.01690673828125, -343.0435791015625, 513.3857421875, 533.3130493164062, 434.00994873046875, -6.468788146972656, 701.7122192382812, 355.99267578125, 638.8040771484375, 335.5755615234375, -102.09305572509766, 250.10296630859375, -87.9375228881836, 477.6961669921875, 151.88241577148438, 135.5653076171875, -91.97955322265625, 179.17236328125, 301.0330505371094, 51.12316131591797, 284.454833984375, 113.6526870727539, 102.65512084960938, 169.00323486328125, -13.184089660644531, 832.0780029296875, 76.56864929199219, 388.22998046875, 692.3727416992188, 173.7169952392578, 864.6236572265625, -70.42943572998047, 73.37244415283203, 329.2374572753906, -218.5056610107422, 448.0314025878906, 30.070648193359375, 475.3792419433594, -123.38978576660156, 133.4598846435547, 606.7655639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 194.16824340820312, "std": 259.8974914550781, "min": -335.19085693359375, "p10": -98.07285690307617, "median": 224.25111389160156, "p90": 445.23314819335945, "max": 1176.0709228515625, "pos_frac": 0.765625, "sample": [299.80743408203125, 94.18172454833984, 213.4061279296875, 451.28857421875, 375.4704895019531, -20.055397033691406, 135.5093994140625, 319.0364074707031, 377.86395263671875, -335.19085693359375, 28.154342651367188, 123.51847839355469, 595.9981689453125, 316.397216796875, 144.8666229248047, 312.0770263671875, 24.6490478515625, 302.6943664550781, 103.34759521484375, 41.01545715332031, 313.1714782714844, 330.43145751953125, -27.264060974121094, 1176.0709228515625, 429.2277526855469, 249.98092651367188, -92.79586029052734, 252.88021850585938, 1.0102062225341797, 147.62042236328125, -152.3759307861328, -100.83518981933594, -216.15750122070312, 867.4806518554688, 558.60986328125, 375.02581787109375, 290.8211669921875, 240.7809295654297, 246.97764587402344, -4.384294509887695, -248.06236267089844, 398.45391845703125, 4.008842468261719, 307.6954650878906, 283.2071838378906, 235.09609985351562, -98.40264129638672, 27.217466354370117, 548.383544921875, 252.8542938232422, 115.7082290649414, -83.50416564941406, 431.10382080078125, -247.21734619140625, 342.8709716796875, 248.20191955566406, 194.50497436523438, -21.81875228881836, 67.280029296875, 519.4248046875, 153.05160522460938, 366.44061279296875, -97.30335998535156, -62.73978042602539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 225.6001739501953, "std": 328.13690185546875, "min": -501.4359436035156, "p10": -106.01977081298826, "median": 164.06227111816406, "p90": 611.279364013672, "max": 1394.59716796875, "pos_frac": 0.765625, "sample": [158.31777954101562, -367.3555603027344, 431.9652404785156, 142.52603149414062, 9.724681854248047, 177.58450317382812, 574.8418579101562, -82.64891815185547, 237.91867065429688, 423.9209899902344, 136.06298828125, 551.0609130859375, 128.756591796875, -66.19246673583984, 42.745399475097656, 58.817832946777344, 12.667474746704102, -10.774726867675781, 207.98321533203125, -233.05508422851562, 490.09149169921875, 345.70330810546875, 525.625732421875, 38.618873596191406, 346.55377197265625, 0.9039115905761719, 445.00531005859375, -116.03585052490234, 100.81499481201172, 169.8067626953125, 790.3590087890625, 14.266777038574219, -7.198833465576172, 443.6064147949219, 186.20716857910156, -501.4359436035156, -76.30467987060547, 107.84835052490234, 593.3169555664062, 41.232181549072266, 840.423828125, -57.735137939453125, -312.3931579589844, -118.53041076660156, 623.4743041992188, 684.7281494140625, 909.3798828125, 188.11196899414062, 618.9775390625, 223.91580200195312, 493.347412109375, 499.56536865234375, -10.688423156738281, 313.6737365722656, 308.108642578125, 325.7359313964844, -177.09579467773438, 44.78144836425781, -0.4723701477050781, 504.1018371582031, 473.736572265625, 88.06838989257812, 106.74591064453125, 1394.59716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 224.13467407226562, "std": 310.5750427246094, "min": -561.46142578125, "p10": -187.94180145263672, "median": 219.36017608642578, "p90": 521.8249969482423, "max": 1118.131591796875, "pos_frac": 0.8125, "sample": [854.3526611328125, -391.5827331542969, 116.38355255126953, 393.7021789550781, 400.5834045410156, 186.53610229492188, 397.2244873046875, 149.33840942382812, -173.61512756347656, 199.5438690185547, 113.28274536132812, 501.6962585449219, -403.68359375, 175.51612854003906, 219.51742553710938, 65.28433227539062, 788.6513671875, 460.5953369140625, 1118.131591796875, 321.6438903808594, 171.45022583007812, 241.26748657226562, -21.578542709350586, 470.9892883300781, 254.35888671875, 778.8921508789062, 155.49761962890625, -262.5208435058594, 134.37582397460938, 29.89612579345703, 530.4515991210938, 128.98228454589844, 116.82000732421875, 198.56121826171875, 349.1871643066406, 354.053955078125, 154.1773681640625, 276.30255126953125, -234.78106689453125, 722.91552734375, 219.2029266357422, 461.80999755859375, 395.3143615722656, -188.18646240234375, 172.29867553710938, 241.9662628173828, 751.6732788085938, 487.7630920410156, 294.5415344238281, 286.74139404296875, -561.46142578125, 425.3929443359375, 134.69642639160156, 241.40252685546875, 19.615079879760742, 242.27096557617188, -345.248291015625, -117.18478393554688, -17.775184631347656, 191.82534790039062, 420.894287109375, 247.49581909179688, -187.3709259033203, 484.5401611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 270.1547546386719, "std": 347.3043212890625, "min": -485.22491455078125, "p10": -97.39309539794918, "median": 222.6649169921875, "p90": 738.2168151855469, "max": 1300.2030029296875, "pos_frac": 0.796875, "sample": [99.01284790039062, 319.178466796875, 624.9666137695312, 1300.2030029296875, 1289.455322265625, 148.64540100097656, 432.9125671386719, 118.45570373535156, 284.37469482421875, 335.2548828125, 114.69488525390625, 219.9892120361328, 730.5277099609375, 845.03857421875, 371.6201171875, 361.8135070800781, 593.4954223632812, 593.1331176757812, 778.527587890625, 9.377862930297852, -230.91244506835938, 400.2185363769531, 123.64511108398438, 219.16082763671875, 162.74591064453125, 407.1939392089844, 543.1015625, 185.56515502929688, -485.22491455078125, -64.91954040527344, -391.3836975097656, 114.08465576171875, -128.51580810546875, 137.12777709960938, 4.03094482421875, 898.9066162109375, 32.02545166015625, 155.76519775390625, 303.6588134765625, 40.890098571777344, -111.31033325195312, 377.9421691894531, -18.309001922607422, 769.4686279296875, 176.88876342773438, -3.8088016510009766, 225.3406219482422, -38.395538330078125, -19.12078857421875, -185.8768768310547, -32.857337951660156, 715.5294799804688, 255.9842987060547, 404.2026062011719, 741.5121459960938, -205.78407287597656, 331.0625305175781, 641.1787109375, 300.0495300292969, 244.13449096679688, 147.1884002685547, 343.3282470703125, 229.6264190673828, 4.088264465332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 157.96469116210938, "std": 297.6238098144531, "min": -654.0145263671875, "p10": -145.4646774291992, "median": 139.5041961669922, "p90": 514.9976440429689, "max": 1034.923583984375, "pos_frac": 0.6875, "sample": [380.0220031738281, 360.68499755859375, 136.82901000976562, 474.4640808105469, -452.8736267089844, 85.3413314819336, 53.441993713378906, 386.66455078125, 11.377182006835938, 285.44769287109375, 700.6704711914062, 471.8548583984375, -54.45341110229492, 132.3516845703125, 279.34661865234375, -142.45619201660156, -95.5513916015625, 1034.923583984375, 275.62713623046875, -146.7540283203125, 167.11947631835938, 706.96435546875, 165.5189666748047, 442.4031982421875, 71.77857971191406, -91.48868560791016, -57.705562591552734, -249.42010498046875, -82.84090423583984, 104.53878784179688, 29.596160888671875, 484.89764404296875, 19.43030548095703, 14.239768981933594, 559.6316528320312, 143.05029296875, -654.0145263671875, 47.080238342285156, 239.5181884765625, 373.0751647949219, 279.54376220703125, 207.48464965820312, 196.83998107910156, -244.31723022460938, -108.62599182128906, -32.870574951171875, -262.6830749511719, 642.6438598632812, 527.8976440429688, -146.87059020996094, 326.876953125, -119.41207122802734, -128.13819885253906, 464.90667724609375, 253.44537353515625, 217.7210693359375, 406.337158203125, -79.23064422607422, 687.964111328125, 110.84136199951172, -97.55368041992188, -27.281173706054688, 281.7110290527344, 142.17938232421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 220.58767700195312, "std": 311.4006042480469, "min": -455.0257263183594, "p10": -104.5953239440918, "median": 165.13802337646484, "p90": 648.7142639160156, "max": 1056.7974853515625, "pos_frac": 0.703125, "sample": [384.4422607421875, 400.8611145019531, 55.452857971191406, 537.0238647460938, 189.23883056640625, 459.3312072753906, 9.500574111938477, -89.18077850341797, 609.88720703125, 141.90472412109375, 311.5963134765625, -125.30542755126953, 311.41656494140625, 545.7913818359375, -2.979156494140625, 363.3896179199219, -109.65715026855469, 41.92279052734375, 651.8157958984375, 872.1294555664062, 194.3942413330078, 304.6702575683594, -38.43584060668945, 55.327484130859375, -455.0257263183594, 491.9171142578125, -257.1347351074219, -60.01349639892578, -104.9590835571289, 269.73260498046875, -6.431501388549805, 115.1873550415039, 367.22320556640625, 414.8514404296875, 3.9676513671875, 8.12509536743164, -33.85595703125, -183.64614868164062, 829.5091552734375, 1.673574447631836, -35.09893798828125, 641.4773559570312, -65.67188262939453, 97.29145050048828, 138.5623016357422, 251.5158233642578, 310.5996398925781, 188.37132263183594, -86.39236450195312, 741.8984375, 375.4159240722656, -103.74655151367188, 710.6222534179688, 358.1506652832031, 1056.7974853515625, 840.849853515625, -120.32212829589844, 351.7322082519531, 616.382080078125, -66.15176391601562, 122.12096405029297, 44.98592758178711, -49.78082275390625, 322.343017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 230.987060546875, "std": 241.08615112304688, "min": -203.54681396484375, "p10": -53.62928123474121, "median": 217.23168182373047, "p90": 508.086996459961, "max": 983.4398193359375, "pos_frac": 0.78125, "sample": [400.75286865234375, 509.7227783203125, 322.02288818359375, 54.162567138671875, 426.8870849609375, 174.53152465820312, 654.8538818359375, -54.53932571411133, 280.2593078613281, -132.4493865966797, -89.30814361572266, -203.54681396484375, 263.35015869140625, 983.4398193359375, 416.3987121582031, 243.67446899414062, -25.091270446777344, 346.62408447265625, 397.3494567871094, 233.94000244140625, 18.97620391845703, 413.0497131347656, 179.63580322265625, 144.95147705078125, -177.17535400390625, 118.75643920898438, 357.6929626464844, 230.8433380126953, 91.9809799194336, 430.7423095703125, 595.8591918945312, -66.33747863769531, 292.1188049316406, 260.4262390136719, 123.447265625, 136.4813690185547, -114.8504409790039, 497.4664001464844, 203.62002563476562, 90.71186828613281, 504.2701721191406, -35.383934020996094, 502.28216552734375, 196.77902221679688, -45.17357635498047, 326.59503173828125, 141.850341796875, 694.8593139648438, 411.76751708984375, -42.6407470703125, 8.281209945678711, 388.4369812011719, 390.19775390625, 115.81109619140625, -45.99329376220703, 644.5878295898438, 86.86272430419922, 393.6605224609375, 325.4497985839844, -51.50584411621094, 588.0489501953125, 120.68812561035156, 133.38626098632812, -1.3768959045410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 230.6171875, "std": 241.70083618164062, "min": -322.259033203125, "p10": -71.46696472167969, "median": 211.38511657714844, "p90": 542.2751403808594, "max": 1007.98828125, "pos_frac": 0.828125, "sample": [144.4765167236328, 554.0506591796875, 445.6189270019531, 541.2926635742188, 88.1684799194336, 279.2060546875, 273.54345703125, 1007.98828125, -281.63916015625, 261.74761962890625, 259.7673034667969, 542.4517822265625, 74.54393768310547, -22.757614135742188, 544.444091796875, 460.86614990234375, -131.9478759765625, 588.3616333007812, 196.4893341064453, 63.71735382080078, 388.3619384765625, 210.92672729492188, -71.15443420410156, -71.60090637207031, 296.9839172363281, 497.6390380859375, 489.1353759765625, 295.263916015625, 163.43011474609375, -9.942947387695312, -30.022823333740234, 201.14736938476562, 93.34574890136719, -322.259033203125, 206.26441955566406, -192.23825073242188, 297.85662841796875, 589.6732788085938, 326.9130859375, 653.5601806640625, 74.65106964111328, 203.97540283203125, 127.02598571777344, 211.843505859375, 120.84719848632812, 285.51422119140625, 106.37604522705078, 406.1921691894531, 112.21701049804688, 191.0562744140625, 541.8629760742188, 477.1094970703125, 381.7774658203125, 259.0005187988281, 144.4197235107422, 11.60888671875, 141.77528381347656, -112.96636962890625, 265.19525146484375, -129.3385467529297, 106.40121459960938, 313.9384765625, 389.9219055175781, 225.42254638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 122.51680755615234, "std": 307.0563659667969, "min": -563.7870483398438, "p10": -263.5809585571289, "median": 114.18663024902344, "p90": 524.1317169189455, "max": 976.2353515625, "pos_frac": 0.640625, "sample": [138.3702392578125, -45.06208038330078, 380.95477294921875, 120.84575653076172, 4.676830291748047, -22.363903045654297, -88.11581420898438, 242.75973510742188, -152.29751586914062, 181.96304321289062, 300.6785888671875, -4.092525482177734, 816.4363403320312, 478.8151550292969, -151.4835205078125, 543.5531005859375, -98.83198547363281, -227.3568878173828, -563.7870483398438, -281.6736755371094, 37.59687042236328, 697.3634643554688, -136.41221618652344, 131.7686767578125, 395.2559509277344, 557.4088134765625, -85.83882904052734, 423.6488342285156, -204.44308471679688, 64.8359603881836, -341.185546875, 723.513671875, 139.66806030273438, -279.1055603027344, 370.1048583984375, 84.95037841796875, 278.32049560546875, 411.44818115234375, -296.00701904296875, 52.76911926269531, -96.45170593261719, -175.78570556640625, 169.79714965820312, 217.885009765625, 9.515880584716797, 298.10223388671875, 394.53363037109375, -203.04196166992188, 663.296875, 218.32022094726562, 175.10784912109375, 72.24923706054688, -338.89569091796875, -22.49903106689453, 178.08319091796875, 107.52750396728516, 140.1625213623047, 269.71697998046875, -424.6927185058594, 976.2353515625, 308.421630859375, 263.57000732421875, -47.960609436035156, 88.2283706665039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 205.3372802734375, "std": 244.41946411132812, "min": -396.4521484375, "p10": -49.230482482910155, "median": 193.40997314453125, "p90": 564.5627929687502, "max": 874.792236328125, "pos_frac": 0.75, "sample": [417.4891662597656, 634.3504028320312, -8.057937622070312, 642.6854858398438, 486.0721740722656, 27.136093139648438, 110.89900207519531, 421.0452880859375, 91.37106323242188, 596.4764404296875, 268.01251220703125, 88.70955657958984, -139.01351928710938, 577.90576171875, -46.374481201171875, 182.06817626953125, 211.72348022460938, 160.94667053222656, 281.594482421875, -50.45448303222656, 670.4722290039062, -3.6768569946289062, 302.3247985839844, 9.896041870117188, -396.4521484375, 75.13807678222656, 298.7734375, 162.9588623046875, -75.09996795654297, 533.42919921875, 292.04144287109375, -105.67134857177734, 494.9623718261719, -189.98382568359375, 335.17254638671875, 14.141841888427734, 406.35235595703125, 874.792236328125, 236.7718505859375, 303.843994140625, -8.013031005859375, -43.499610900878906, 255.3614501953125, 511.5477294921875, 98.63562774658203, -19.338726043701172, 141.30894470214844, 239.44125366210938, -22.873233795166016, 219.17471313476562, -85.5230712890625, 348.81243896484375, 131.80404663085938, 16.751022338867188, 255.36863708496094, 204.75177001953125, 92.79875183105469, 326.2978820800781, -31.645355224609375, 295.27685546875, 109.27201843261719, 669.0440673828125, -3.57684326171875, 245.63656616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 189.1357879638672, "std": 258.0898742675781, "min": -424.557861328125, "p10": -72.39048919677732, "median": 127.24040222167969, "p90": 558.7547912597659, "max": 933.16259765625, "pos_frac": 0.78125, "sample": [102.16464233398438, 617.4801025390625, 16.021133422851562, 256.9863586425781, 226.32862854003906, 369.6414794921875, 92.80587768554688, 391.4859619140625, 120.18521118164062, 12.328094482421875, 11.443214416503906, 41.7049560546875, -80.09567260742188, -12.852737426757812, 10.24091911315918, 720.52880859375, -111.25096893310547, 584.9230346679688, 86.48416137695312, 145.1741180419922, -19.9610652923584, -19.449966430664062, 27.87213897705078, 49.062042236328125, 158.404541015625, -424.557861328125, 347.53662109375, -140.11993408203125, 933.16259765625, 379.3675537109375, 421.2025146484375, 182.93295288085938, 344.93731689453125, 9.192649841308594, 645.5158081054688, 220.86001586914062, -11.889579772949219, 37.79928970336914, -19.9182071685791, 375.7171936035156, 311.81951904296875, 374.5672302246094, -229.489501953125, 278.8130187988281, 414.87799072265625, -96.91592407226562, 327.1957702636719, 642.1663818359375, -14.214946746826172, 107.64549255371094, 374.09259033203125, 140.80032348632812, 32.2808837890625, -54.41172790527344, 497.695556640625, 906.1441650390625, 89.6832504272461, 110.9447250366211, 250.54319763183594, -92.46542358398438, 152.7262725830078, 134.29559326171875, 244.24009704589844, 102.26264953613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 218.9563446044922, "std": 285.1737976074219, "min": -375.56781005859375, "p10": -120.56499099731444, "median": 177.35588836669922, "p90": 621.5901611328128, "max": 1082.6904296875, "pos_frac": 0.765625, "sample": [-375.56781005859375, 171.0604248046875, 145.01287841796875, 408.0782775878906, -143.51498413085938, 695.0565185546875, -148.64962768554688, 891.0597534179688, 116.42156219482422, -42.23567199707031, -135.8960723876953, -77.0617446899414, 366.0813293457031, 331.4879150390625, -171.08673095703125, 64.06007385253906, 303.44793701171875, -148.54776000976562, 530.3504028320312, -67.9546890258789, 246.54718017578125, 183.86361694335938, 478.220458984375, 154.52679443359375, -9.911022186279297, 66.44508361816406, 33.46460723876953, 418.3779296875, 50.74237060546875, 59.607994079589844, 455.42822265625, 302.77056884765625, 1082.6904296875, 439.56951904296875, -106.92738342285156, 90.4181900024414, 219.45761108398438, 106.3646469116211, -123.75605010986328, 711.9765625, 240.95327758789062, 527.0452880859375, 152.0831756591797, 353.4306945800781, -105.88180541992188, 183.85121154785156, 414.4643859863281, 325.9377746582031, 197.17108154296875, 701.669921875, 84.58395385742188, -113.11918640136719, -76.47807312011719, 297.9638977050781, 183.65135192871094, 652.1406860351562, 28.263641357421875, 146.5926971435547, 707.038818359375, 550.3056030273438, 160.9144287109375, 513.1229858398438, 121.67109680175781, 194.3502197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 181.6644287109375, "std": 196.9057159423828, "min": -197.71170043945312, "p10": -50.96458091735839, "median": 154.08783721923828, "p90": 447.8564086914064, "max": 624.6005249023438, "pos_frac": 0.8125, "sample": [44.021087646484375, 460.13421630859375, 115.67208099365234, 205.1282958984375, 5.409124374389648, 381.4024658203125, 29.487159729003906, 252.80441284179688, 289.9591369628906, 179.2616729736328, 158.00802612304688, 399.8450012207031, -88.321533203125, 291.251708984375, 596.2389526367188, 145.0968780517578, 36.339412689208984, -2.1461639404296875, 406.304931640625, 62.005401611328125, 339.8262023925781, 206.78518676757812, -99.38887786865234, 245.5124053955078, 86.23440551757812, 317.60504150390625, 467.74029541015625, 20.052169799804688, 17.224079132080078, 390.1315612792969, 530.45703125, 386.3510437011719, 534.3521118164062, -63.393463134765625, 33.621559143066406, 150.1676483154297, 232.19915771484375, 134.6004638671875, 419.20819091796875, 8.010467529296875, 52.68985366821289, 624.6005249023438, -44.051116943359375, -53.927494049072266, -66.30661010742188, 62.393035888671875, -197.71170043945312, 202.07920837402344, 37.61522674560547, -85.12138366699219, -27.885467529296875, 334.80047607421875, 27.10841941833496, 383.1058044433594, 401.9941101074219, 222.40274047851562, 314.17041015625, -23.48145294189453, 161.81802368164062, 467.60345458984375, 95.88175964355469, -29.773141860961914, 53.450340270996094, 387.8696594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 178.4139404296875, "std": 262.1260070800781, "min": -458.2357482910156, "p10": -113.91688613891601, "median": 182.4579315185547, "p90": 501.3523620605469, "max": 826.1107788085938, "pos_frac": 0.71875, "sample": [20.714956283569336, 183.95175170898438, 23.156139373779297, 349.853271484375, -32.380775451660156, -207.03189086914062, 31.72490692138672, 246.93289184570312, 522.796875, 103.00190734863281, 180.964111328125, 310.3155212402344, -458.2357482910156, 446.75396728515625, -29.641250610351562, -114.03797149658203, 185.0631103515625, 132.45033264160156, -272.5587463378906, -20.750612258911133, 246.2686767578125, -118.07323455810547, -48.0916748046875, -50.558345794677734, 216.3461456298828, 765.705078125, 340.25213623046875, 529.0146484375, 132.01202392578125, 455.98992919921875, -45.6541748046875, -62.413997650146484, 143.3205108642578, 76.00717163085938, 487.239013671875, 477.1920166015625, -187.2328643798828, 826.1107788085938, 688.3568725585938, 507.40093994140625, 217.93763732910156, 229.13829040527344, 656.8607177734375, 438.3456726074219, 188.57843017578125, 187.72402954101562, 39.27275848388672, 196.80894470214844, -40.943267822265625, -109.75765228271484, 33.08940124511719, -13.61899185180664, 453.8623962402344, 19.89488983154297, 447.22314453125, 330.06427001953125, 158.30303955078125, -113.63435363769531, 272.9727478027344, 12.295394897460938, 197.66595458984375, -218.77688598632812, 442.24493408203125, 410.70556640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 279.48834228515625, "std": 280.02850341796875, "min": -186.82791137695312, "p10": -28.619164466857896, "median": 234.64077758789062, "p90": 645.7403625488282, "max": 1349.8778076171875, "pos_frac": 0.859375, "sample": [126.4561767578125, 111.783935546875, 561.5538330078125, 546.2980346679688, 333.5727233886719, 42.249732971191406, 1349.8778076171875, 277.36590576171875, 189.30526733398438, 655.9937744140625, 565.2342529296875, 107.0861587524414, 54.453773498535156, 99.1431884765625, 256.8533935546875, 539.10693359375, 202.1254425048828, -13.564580917358398, 13.3369140625, 317.73779296875, 467.8985595703125, 92.11264038085938, 34.392906188964844, -74.7535400390625, 59.90443420410156, -35.071128845214844, 149.93905639648438, 267.709716796875, -89.84069061279297, 664.4110107421875, 621.8157348632812, 107.76228332519531, -135.48953247070312, 263.8030700683594, 249.8878631591797, 149.4018096923828, 590.3402099609375, -67.74162292480469, 177.2936248779297, 675.4053955078125, 258.3714294433594, 92.06272888183594, 797.4195556640625, 472.7897033691406, 103.61026763916016, 203.50289916992188, 426.3832092285156, 162.12518310546875, 100.74897766113281, 772.9402465820312, 244.60403442382812, 785.4415893554688, 455.9432678222656, 367.52337646484375, 224.67752075195312, 544.7031860351562, 466.042236328125, -186.82791137695312, 151.9394073486328, 451.2165832519531, 259.045654296875, 358.5964660644531, -1.943023681640625, -128.81631469726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 282.3719482421875, "std": 285.9869079589844, "min": -253.890625, "p10": -47.50895004272461, "median": 268.845458984375, "p90": 662.1006774902344, "max": 1294.665283203125, "pos_frac": 0.828125, "sample": [-253.890625, 321.80413818359375, 537.99609375, 268.8577880859375, 183.19740295410156, 222.80227661132812, 315.83599853515625, 296.2449951171875, 259.6812744140625, 109.76653289794922, 372.6910400390625, 240.4804229736328, 369.52044677734375, 43.298072814941406, 314.160400390625, 897.5455322265625, -59.598419189453125, 663.58154296875, 60.49830627441406, 746.7642822265625, 514.640625, 75.8846664428711, 261.735107421875, 417.98504638671875, 288.5292053222656, 427.25103759765625, 1294.665283203125, 616.259765625, -59.8726806640625, 658.6453247070312, 185.5634765625, 248.3297576904297, -4.709075927734375, 269.0164794921875, 443.8840026855469, 34.50995635986328, 298.07623291015625, -49.839935302734375, 680.5208129882812, 521.18505859375, 412.7250671386719, -146.86630249023438, 56.844757080078125, 447.96954345703125, -52.554588317871094, 390.2922668457031, 636.9700927734375, -6.7210693359375, 268.8331298828125, 425.1036682128906, 5.471214294433594, 164.92115783691406, -213.48452758789062, 387.6663818359375, 195.6590576171875, 13.836891174316406, 15.127220153808594, 220.78912353515625, 56.38814926147461, -18.18071746826172, 751.8182373046875, 741.6299438476562, -42.069984436035156, 326.13787841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 213.612060546875, "std": 280.36041259765625, "min": -497.1353454589844, "p10": -77.37354202270508, "median": 190.7569580078125, "p90": 556.3176452636719, "max": 939.0712890625, "pos_frac": 0.78125, "sample": [407.61566162109375, 259.9389343261719, -182.1442108154297, 58.620460510253906, -497.1353454589844, 561.6015014648438, -305.2510681152344, 296.865478515625, 265.0144958496094, 924.9805297851562, 163.50782775878906, 729.4547119140625, 395.2845153808594, 388.38983154296875, -74.20531463623047, 107.149658203125, 99.038330078125, -282.3962707519531, 323.2635192871094, -9.637664794921875, -127.3134765625, 425.5267028808594, 246.17526245117188, 446.6354675292969, 351.10919189453125, 20.850053787231445, -25.301395416259766, 477.10577392578125, 198.511474609375, 217.40603637695312, -63.69371032714844, 646.7448120117188, 149.68576049804688, -0.778350830078125, 107.76358795166016, 135.178955078125, 519.8709106445312, 328.52740478515625, 313.62274169921875, -78.73135375976562, 125.08470916748047, 648.5382080078125, 662.1978149414062, 108.84812927246094, 306.32122802734375, -3.4872512817382812, 3.580617904663086, 371.21905517578125, 543.9886474609375, 377.6355895996094, 46.51975631713867, 177.12046813964844, 40.016998291015625, 108.58667755126953, 939.0712890625, 70.8402099609375, 292.4405517578125, 306.5682067871094, -13.902444839477539, 254.2578125, 38.461700439453125, 183.00244140625, 467.92218017578125, -302.5127868652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 225.06381225585938, "std": 274.67962646484375, "min": -321.7203674316406, "p10": -61.52406463623046, "median": 177.05492401123047, "p90": 539.1546264648438, "max": 1329.197998046875, "pos_frac": 0.84375, "sample": [201.012451171875, 191.27499389648438, 90.71441650390625, 105.58265686035156, 152.96510314941406, 143.81350708007812, 352.49700927734375, 119.68647003173828, 239.94857788085938, 168.28909301757812, 291.6341552734375, 422.821533203125, -77.35700988769531, 220.77517700195312, 34.60084533691406, -57.20831298828125, 105.47862243652344, 751.1678466796875, 613.8645629882812, 268.2218933105469, 132.55657958984375, 56.96698760986328, 73.2624740600586, 33.835052490234375, 207.8607635498047, -71.12066650390625, 113.96670532226562, 30.87582778930664, 177.6499481201172, 176.45989990234375, -63.37367248535156, -47.75802230834961, 112.74967193603516, 41.70669174194336, 508.0335693359375, 693.9349975585938, 362.0249938964844, 82.67617797851562, -112.38638305664062, 539.3478393554688, 253.3571319580078, -239.69558715820312, 195.68638610839844, 18.277658462524414, 18.34479331970215, 255.84620666503906, 258.3592834472656, 404.60614013671875, 354.7101745605469, -111.8599853515625, 39.760169982910156, 479.383056640625, -2.847097396850586, 293.9245910644531, 299.3275451660156, 304.28857421875, 1329.197998046875, 538.7037963867188, 469.8857421875, -321.7203674316406, 846.7615966796875, 781.034912109375, 116.39263916015625, 433.3055419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 233.8548583984375, "std": 307.8041076660156, "min": -372.8016052246094, "p10": -128.4997619628906, "median": 185.15216064453125, "p90": 646.649658203125, "max": 993.1517944335938, "pos_frac": 0.78125, "sample": [-36.52758026123047, 184.86184692382812, 323.1762390136719, 91.18446350097656, -372.8016052246094, 47.05287551879883, -82.4643325805664, 29.53884506225586, -136.40155029296875, 500.33807373046875, 104.87477111816406, -219.87716674804688, 557.5614013671875, 725.8060913085938, 133.35397338867188, -149.1137237548828, 362.4014587402344, 35.549747467041016, 182.34971618652344, -92.42292785644531, 346.2754211425781, 648.312255859375, 404.2390441894531, 65.29685974121094, 618.447509765625, 993.1517944335938, 326.53076171875, 69.56627655029297, 642.770263671875, 221.0067138671875, 185.44247436523438, -48.2957763671875, -254.82186889648438, 672.0313720703125, 340.9697265625, 437.24871826171875, 899.9050903320312, 357.7514953613281, 233.23355102539062, 137.47251892089844, 241.92674255371094, 137.81552124023438, 45.64031982421875, -97.59622192382812, 558.40380859375, -211.3582305908203, 344.0941162109375, 46.43974685668945, 529.0279541015625, 600.7491455078125, 464.52166748046875, 442.61865234375, 732.423583984375, 372.4432678222656, 822.1881103515625, 266.7009582519531, -110.062255859375, 35.42301940917969, -100.19561767578125, 165.01541137695312, 89.29021453857422, 141.93617248535156, 331.275634765625, -366.986572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 186.4353485107422, "std": 249.6841583251953, "min": -627.3193359375, "p10": -83.88497848510742, "median": 150.70303344726562, "p90": 512.0148681640625, "max": 834.0758056640625, "pos_frac": 0.78125, "sample": [82.18826293945312, 128.24697875976562, 513.424560546875, 73.7626953125, 602.053466796875, -30.831331253051758, 414.7564697265625, 103.052734375, 547.4213256835938, -2.6641159057617188, 10.955642700195312, 13.19224739074707, -89.05535888671875, 543.8314208984375, 522.5936889648438, 265.0506591796875, 508.7255859375, 161.91720581054688, 258.0946044921875, -70.30712127685547, 295.69415283203125, 459.5377502441406, -84.32474517822266, -19.685707092285156, 358.42498779296875, 305.65545654296875, 834.0758056640625, -106.66197204589844, 526.138916015625, 86.78564453125, 474.9632873535156, 194.4318084716797, 39.599063873291016, 360.9049072265625, 107.39833068847656, 198.3834228515625, 288.0948486328125, 296.9072570800781, 437.7811279296875, 32.21158218383789, 190.1178741455078, -246.03897094726562, -187.2909698486328, -8.650663375854492, -82.85885620117188, 38.765869140625, 77.92312622070312, 439.1040954589844, 134.6921844482422, 94.75946044921875, -196.10110473632812, -627.3193359375, 107.37940216064453, 148.26658630371094, 276.4044189453125, 366.211669921875, 396.62677001953125, 230.88258361816406, 471.38079833984375, 18.379867553710938, 153.1394805908203, -20.447654724121094, 493.4632568359375, 20.347015380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 299.6893310546875, "std": 221.60476684570312, "min": -117.43881225585938, "p10": 20.005958938598635, "median": 329.7808074951172, "p90": 570.2013366699221, "max": 1018.6226806640625, "pos_frac": 0.921875, "sample": [334.9978332519531, 374.1161804199219, 191.23684692382812, 504.5875549316406, 361.48236083984375, 588.5015869140625, 178.81207275390625, 83.36213684082031, 612.9141235351562, 191.49069213867188, 19.425357818603516, 340.9942626953125, 492.80548095703125, 76.08273315429688, 297.3988037109375, 426.1783142089844, 358.3617858886719, 192.13548278808594, -38.261756896972656, 437.40875244140625, 255.84622192382812, 290.7467041015625, 533.6546630859375, 397.1178283691406, 356.75335693359375, 73.34231567382812, 449.0997314453125, -4.109230041503906, 59.57686996459961, 10.737241744995117, 131.30006408691406, 448.79803466796875, 372.5174865722656, 429.59857177734375, 324.56378173828125, 509.21923828125, 528.6657104492188, 34.46240234375, 175.36878967285156, 21.360694885253906, 346.18048095703125, 585.8641967773438, 469.8833312988281, 603.4652099609375, 153.2107391357422, 455.3567810058594, 693.5733032226562, 54.66380310058594, -117.43881225585938, 85.53567504882812, 111.17078399658203, 520.8683471679688, 436.6349792480469, 481.9814147949219, 372.5146484375, 73.41828918457031, 213.30270385742188, 1018.6226806640625, 197.6688232421875, 245.45236206054688, 319.70355224609375, -63.62577819824219, -88.43156433105469, 587.8906860351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 157.07363891601562, "std": 298.8527526855469, "min": -703.2120361328125, "p10": -196.6459289550781, "median": 127.70339584350586, "p90": 552.7728820800783, "max": 908.4901123046875, "pos_frac": 0.734375, "sample": [-256.5043029785156, -112.7221908569336, 908.4901123046875, -36.606971740722656, -288.55816650390625, 27.678098678588867, -41.99077606201172, 338.1289367675781, -14.205387115478516, 763.287109375, 233.35948181152344, -225.46963500976562, 109.73355865478516, 318.3137512207031, 314.130126953125, 758.4705200195312, 524.52734375, 150.70068359375, 92.95602416992188, 382.2663269042969, 279.158447265625, 101.98316955566406, 77.32485961914062, 223.81976318359375, 264.5081481933594, 116.29638671875, -415.50396728515625, 283.7664794921875, 105.68533325195312, 95.28447723388672, 218.3176727294922, 399.23876953125, 144.9403076171875, 177.96852111816406, 238.49063110351562, -18.048805236816406, 346.40643310546875, 125.26103973388672, -166.1798858642578, 265.4517822265625, 564.8781127929688, -209.7028045654297, 746.7491455078125, 65.83363342285156, 482.7207946777344, 212.77593994140625, 511.5352783203125, -456.4707336425781, 6.575969696044922, 653.303955078125, 190.51519775390625, 140.01536560058594, -703.2120361328125, 90.50687408447266, 130.145751953125, -123.47918701171875, -92.81532287597656, 90.72174835205078, 100.83448028564453, 659.9898071289062, 201.08255004882812, -56.767494201660156, 95.0684814453125, -58.24700164794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 150.0509033203125, "std": 279.1884765625, "min": -397.07562255859375, "p10": -248.54268035888668, "median": 166.29345703125, "p90": 499.3950988769532, "max": 795.0435791015625, "pos_frac": 0.734375, "sample": [68.42530059814453, 253.79290771484375, 197.74510192871094, -397.07562255859375, 688.0358276367188, 444.50067138671875, 147.16976928710938, 315.92535400390625, 89.49365234375, -53.771400451660156, 390.6763916015625, 432.6430358886719, 56.777618408203125, 179.64755249023438, -147.60137939453125, 555.4212646484375, 272.927490234375, 134.75271606445312, 245.73358154296875, 795.0435791015625, 762.6981811523438, 228.86880493164062, 330.4173889160156, -108.7466049194336, 253.68099975585938, -161.01290893554688, 36.0985107421875, 274.53173828125, -266.1213684082031, 202.09698486328125, 512.7189331054688, 123.4683837890625, 16.215774536132812, 280.599609375, -26.676788330078125, 375.32000732421875, 269.9004211425781, -156.93649291992188, -364.9283447265625, 39.336524963378906, 188.43289184570312, 274.360595703125, -207.52574157714844, 98.23342895507812, 468.30615234375, 152.93936157226562, 225.11663818359375, 301.81170654296875, 211.7499542236328, -375.72705078125, -96.98719787597656, -64.00369262695312, 122.70089721679688, 111.76754760742188, -383.1382141113281, -317.3891296386719, 0.2311859130859375, -283.1431579589844, -47.071773529052734, 456.4271240234375, 571.6099853515625, 257.1513977050781, 14.20068359375, 631.4116821289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 216.36563110351562, "std": 318.02142333984375, "min": -591.2305297851562, "p10": -102.8248924255371, "median": 164.1659927368164, "p90": 653.2595581054688, "max": 955.7471313476562, "pos_frac": 0.71875, "sample": [398.6492004394531, 276.7782897949219, 502.3851623535156, 70.34844207763672, -591.2305297851562, 160.11636352539062, 605.467041015625, 177.23171997070312, 857.7499389648438, 290.18780517578125, 565.0238037109375, -5.341806411743164, 320.986083984375, 216.29559326171875, -336.5561218261719, 128.7035675048828, -22.936859130859375, 192.52352905273438, 485.9093017578125, -36.4119873046875, -35.08195495605469, -240.0458221435547, 739.1477661132812, -99.60533142089844, -359.81304931640625, 72.50615692138672, 953.0990600585938, 626.9788818359375, -210.72877502441406, 30.859189987182617, 737.2425537109375, -27.941207885742188, 366.3128356933594, 205.98719787597656, -104.20470428466797, -18.594589233398438, 664.522705078125, -120.4153823852539, 302.49072265625, 477.39617919921875, -32.014495849609375, 80.0217056274414, 735.9144287109375, 95.67389678955078, 7.332256317138672, 232.4859619140625, 263.5823059082031, 518.2311401367188, 507.970703125, -77.24855041503906, 396.45556640625, 103.43468475341797, 107.12141418457031, 439.7054443359375, 168.2156219482422, 152.99972534179688, 133.65211486816406, 75.51283264160156, 34.66117858886719, 495.1466064453125, 284.2290344238281, 955.7471313476562, -41.55278015136719, -5.868843078613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 259.337158203125, "std": 330.6878967285156, "min": -411.4425048828125, "p10": -102.1666290283203, "median": 179.40428924560547, "p90": 751.1556945800783, "max": 1287.8125, "pos_frac": 0.78125, "sample": [368.5126953125, 455.605224609375, 728.9817504882812, -264.9508056640625, 54.12016296386719, 641.2584228515625, 635.764404296875, 229.80987548828125, 361.25665283203125, -12.239028930664062, 1287.8125, 350.3360595703125, -73.81549072265625, 142.10897827148438, -32.166107177734375, 124.76680755615234, 93.76826477050781, 65.37328338623047, 832.5433349609375, -154.97987365722656, 103.45909118652344, 161.80325317382812, 760.6588134765625, -106.4953384399414, 555.1137084960938, 175.28565979003906, 92.77352142333984, 231.57022094726562, 65.90255737304688, 170.918701171875, 116.07492065429688, 345.1149597167969, 376.7052001953125, -106.23912811279297, 117.83868408203125, -92.66413116455078, 343.8115234375, 769.4098510742188, 28.848730087280273, 940.0263671875, 577.5221557617188, 351.6142272949219, 183.52291870117188, -30.481170654296875, 434.66534423828125, 498.6766662597656, 707.5621948242188, 224.9094696044922, 16.013504028320312, 377.078369140625, 203.13467407226562, 288.64459228515625, 101.05752563476562, -86.2322006225586, 787.084716796875, 161.19676208496094, -54.852447509765625, 261.34039306640625, -173.5102081298828, 832.3941040039062, -282.10107421875, -411.4425048828125, 603.2147827148438, 142.78207397460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 243.22650146484375, "std": 278.54302978515625, "min": -526.1529541015625, "p10": -80.29970397949216, "median": 218.9776611328125, "p90": 635.9063842773437, "max": 811.4934692382812, "pos_frac": 0.859375, "sample": [147.2825927734375, -215.87989807128906, -92.11761474609375, 283.509033203125, 112.66061401367188, 811.4934692382812, 445.9608154296875, 8.623458862304688, 293.0111999511719, 193.11328125, 692.408203125, 250.40065002441406, -13.791778564453125, 195.97113037109375, 371.70159912109375, 40.86096954345703, 21.92135238647461, 689.4935913085938, 293.3289489746094, 64.80370330810547, -331.9909362792969, -52.724578857421875, 218.4921875, -526.1529541015625, 101.84521484375, 73.06938934326172, 202.73040771484375, 102.47128295898438, 583.93115234375, 403.6439208984375, 239.8102569580078, 432.7376708984375, 352.3516845703125, 551.0994873046875, 476.7991027832031, -94.48983764648438, -160.27978515625, 634.5994873046875, 306.96490478515625, 7.7455596923828125, 809.7705078125, 148.99429321289062, 376.84686279296875, 219.463134765625, 385.7311706542969, 430.78790283203125, 300.8832092285156, 186.1226043701172, 635.4724731445312, 711.9764404296875, 235.6207275390625, 23.4068603515625, 9.72552490234375, 151.87994384765625, 7.6125640869140625, 362.078369140625, 125.42243957519531, 104.01641845703125, 724.5114135742188, 513.0811157226562, 439.0451965332031, 636.0923461914062, -112.93315124511719, 23.47875213623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 214.69686889648438, "std": 358.47491455078125, "min": -713.28173828125, "p10": -145.11227874755858, "median": 133.56192016601562, "p90": 588.9072509765626, "max": 1293.4505615234375, "pos_frac": 0.734375, "sample": [-168.07232666015625, 117.20109558105469, -30.227737426757812, 1293.4505615234375, 422.6612548828125, 1205.1094970703125, 257.2848205566406, 619.398193359375, 367.8592529296875, -6.143775939941406, 190.20309448242188, 294.2539978027344, 76.97012329101562, 213.58151245117188, 91.28897094726562, -69.49604034423828, 558.04248046875, 91.63076782226562, 525.0734252929688, 447.6134948730469, -72.64471435546875, 9.667049407958984, 373.60821533203125, 55.78718566894531, -36.34818649291992, 884.1588134765625, -15.031728744506836, 58.41945266723633, -244.8920135498047, 353.6397705078125, 240.10525512695312, 269.1444091796875, -204.0491943359375, 986.3683471679688, -407.4703369140625, 11.647811889648438, -31.875350952148438, 510.0126037597656, 99.96521759033203, -48.379547119140625, 149.92274475097656, 47.07239532470703, 49.053871154785156, 78.76309967041016, 257.8456726074219, 115.50260162353516, -163.42518615722656, 517.3606567382812, 339.01654052734375, 457.20294189453125, 160.6745147705078, 602.135009765625, -128.4422149658203, 392.2560119628906, 364.9200744628906, -17.61986541748047, -152.256591796875, 198.18603515625, 41.18205261230469, 50.52131652832031, 1130.097900390625, -713.28173828125, 177.22210693359375, 497.1730651855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 208.34274291992188, "std": 309.0815734863281, "min": -562.7738647460938, "p10": -169.7778808593749, "median": 179.51897430419922, "p90": 567.3581054687501, "max": 1091.2755126953125, "pos_frac": 0.71875, "sample": [656.868896484375, 300.5002746582031, -26.97528076171875, 302.61639404296875, -67.57508850097656, -208.18331909179688, 449.0313720703125, 271.1913757324219, 209.0895538330078, -23.118915557861328, -9.749612808227539, -26.123062133789062, -11.302925109863281, -562.7738647460938, 144.05908203125, 885.25439453125, 365.4568176269531, 55.74310302734375, 341.0986633300781, 135.320068359375, 31.662445068359375, 187.67831420898438, -486.8074951171875, 93.32636260986328, 440.47265625, -262.01422119140625, 591.2649536132812, 49.46186065673828, 101.84115600585938, 91.2297134399414, -296.3995666503906, -14.477523803710938, 547.009033203125, 557.0496826171875, 39.96147537231445, 265.8471984863281, 256.4856872558594, 461.4808349609375, 516.7763671875, 688.06787109375, 386.7066345214844, 171.35963439941406, 167.43630981445312, -294.53070068359375, 501.04876708984375, 513.1121826171875, 308.1647644042969, -218.32403564453125, 1091.2755126953125, 528.373779296875, 413.06085205078125, -0.8642005920410156, -13.2608642578125, 310.8509216308594, 168.22378540039062, 571.7760009765625, 12.123794555664062, 583.5658569335938, -80.1651840209961, 274.56097412109375, 430.178466796875, 387.55718994140625, 105.45501708984375, -24.095195770263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 261.89935302734375, "std": 315.63818359375, "min": -489.27392578125, "p10": -89.7271614074707, "median": 235.25433349609375, "p90": 709.1742553710938, "max": 902.8450927734375, "pos_frac": 0.78125, "sample": [-77.4776382446289, 739.4509887695312, 285.7680358886719, 43.62736892700195, 797.0443725585938, 84.51710510253906, 523.9530639648438, -291.4991760253906, -51.40931701660156, -58.87580871582031, 379.1873779296875, -1.5290164947509766, 683.8742065429688, 124.30612182617188, -239.64309692382812, 498.39361572265625, 548.0839233398438, -175.04421997070312, -88.39877319335938, 223.45237731933594, 122.04856872558594, 13.093854904174805, -136.34075927734375, 808.9902954101562, 782.2122192382812, 126.93022918701172, 459.927734375, 312.87347412109375, 432.5785217285156, -54.32844924926758, 700.828857421875, 426.44195556640625, 126.33857727050781, 532.7265014648438, 167.66441345214844, 574.205322265625, 494.5365295410156, 255.21109008789062, 456.0147705078125, 291.39447021484375, 131.04302978515625, 519.1505737304688, -49.53471374511719, 189.47259521484375, 617.8614501953125, 77.94444274902344, 902.8450927734375, 3.3702774047851562, -130.69630432128906, 82.00507354736328, 15.149581909179688, 20.843069076538086, 695.8463134765625, 712.7508544921875, 294.74359130859375, 745.271728515625, 173.8307647705078, 291.5758056640625, 247.05628967285156, 279.7093505859375, -90.29647064208984, 83.70484924316406, 596.05517578125, -489.27392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 257.19952392578125, "std": 320.58465576171875, "min": -476.5831298828125, "p10": -96.38984985351563, "median": 228.22030639648438, "p90": 695.5302307128907, "max": 1057.1348876953125, "pos_frac": 0.765625, "sample": [350.5025939941406, 950.0660400390625, 109.4129638671875, -139.32888793945312, 255.6331329345703, -97.76641845703125, 228.73385620117188, -156.79150390625, 289.28948974609375, 30.24707794189453, 899.7529907226562, 698.6029052734375, -17.35790252685547, 1057.1348876953125, 16.659963607788086, 279.5035705566406, 201.5406036376953, 493.1172790527344, 227.70675659179688, -17.172452926635742, 234.47137451171875, 592.7831420898438, 416.3252258300781, -37.0180778503418, 589.5897216796875, 889.6962280273438, 466.6105651855469, -253.7095489501953, 791.8093872070312, 188.303466796875, 312.18695068359375, 220.13307189941406, 27.63512420654297, 115.16543579101562, 42.940975189208984, 367.105712890625, 137.01710510253906, 297.2274169921875, 441.94549560546875, 688.3606567382812, 259.4696960449219, -44.33709716796875, -476.5831298828125, 161.3843994140625, -160.34292602539062, 109.47893524169922, 646.7132568359375, 435.1727294921875, -254.67478942871094, 351.0467224121094, 33.57487487792969, 743.1199951171875, -47.33833312988281, -93.1778564453125, -82.87055206298828, 604.8003540039062, 163.35601806640625, 606.9678955078125, 367.43682861328125, 73.8802261352539, 434.81268310546875, 195.76962280273438, 302.6248474121094, -57.581443786621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 273.6103515625, "std": 294.0885925292969, "min": -639.6011962890625, "p10": -61.45261840820312, "median": 255.0072784423828, "p90": 626.4262878417969, "max": 1139.0499267578125, "pos_frac": 0.859375, "sample": [705.4520874023438, 295.70941162109375, 647.4818115234375, 630.8013916015625, 512.6058959960938, 100.28579711914062, 179.82855224609375, 400.64910888671875, 146.6995849609375, 198.15579223632812, 393.93829345703125, 338.6074523925781, 584.74609375, 206.47132873535156, 150.80877685546875, -30.498519897460938, 256.1807861328125, 761.785888671875, 233.69033813476562, 305.7530822753906, 226.77186584472656, -63.567665100097656, 12.57046890258789, 20.713600158691406, 295.193115234375, 203.85464477539062, -66.88298034667969, 530.7330322265625, 99.15758514404297, 552.3511962890625, 616.2177124023438, 491.5019226074219, 341.1647644042969, 47.8331298828125, 1.1756057739257812, 484.1814880371094, 253.83377075195312, 1139.0499267578125, 52.30513000488281, -321.45367431640625, 336.5447692871094, -84.96862030029297, 201.0546417236328, 586.6014404296875, 104.82907104492188, -82.86940002441406, 519.2357177734375, 372.5635070800781, 513.7910766601562, 319.0134582519531, 126.44041442871094, 384.071044921875, 473.45355224609375, 276.59124755859375, -639.6011962890625, 214.60487365722656, 221.28091430664062, 571.2871704101562, -56.51750946044922, 689.1371459960938, 139.8147430419922, -405.2398681640625, 143.44842529296875, 650.6396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 271.435302734375, "std": 333.5232238769531, "min": -524.9966430664062, "p10": -89.5949836730957, "median": 230.32736206054688, "p90": 735.2326843261719, "max": 1322.743408203125, "pos_frac": 0.78125, "sample": [202.73699951171875, 500.7691650390625, -3.389862060546875, 240.2457275390625, -55.43071746826172, 86.54907989501953, 156.01962280273438, 742.1674194335938, 205.00967407226562, 198.3551025390625, 450.0140686035156, 835.5679931640625, 6.0405731201171875, 631.01171875, 351.33966064453125, 241.31112670898438, -524.9966430664062, 909.473876953125, 27.222213745117188, 261.88592529296875, 567.4921875, 488.6276550292969, 410.62908935546875, -8.214397430419922, 220.40899658203125, -68.584716796875, -374.5419006347656, 194.92465209960938, 36.78412628173828, 441.33502197265625, 846.51171875, 416.92010498046875, -231.96240234375, 136.0157928466797, 519.80859375, 783.7744140625, 360.376708984375, 329.2065734863281, 719.0516357421875, 161.33494567871094, -82.01830291748047, 579.0501098632812, 217.61178588867188, -11.076610565185547, 336.67864990234375, -177.84365844726562, 151.48031616210938, 466.0909118652344, 290.0324401855469, 216.21385192871094, -92.84213256835938, 369.437744140625, 826.0090942382812, 271.05792236328125, 1322.743408203125, 320.7344970703125, 48.08348846435547, 508.13238525390625, 76.4970932006836, -161.13726806640625, -5.2657623291015625, 634.4700927734375, -159.44418334960938, 15.359939575195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 296.16217041015625, "std": 342.9981384277344, "min": -858.6939697265625, "p10": -89.94090881347654, "median": 271.63951110839844, "p90": 738.2009765625, "max": 1210.05322265625, "pos_frac": 0.84375, "sample": [166.091552734375, 170.13063049316406, 177.51951599121094, 1210.05322265625, 109.94127655029297, 106.59404754638672, 140.60250854492188, 335.920166015625, 383.4732971191406, 373.22998046875, 137.76980590820312, 433.4715576171875, -98.08052825927734, 545.1362915039062, 912.4810791015625, 423.6878967285156, 506.7793273925781, 578.0779418945312, 454.9769592285156, 216.92840576171875, 441.2425537109375, -254.99777221679688, 290.1121826171875, 231.9559326171875, 25.815217971801758, -70.9484634399414, 536.263671875, 174.30252075195312, 102.86837768554688, 738.649658203125, 416.90887451171875, 741.1187133789062, 624.8657836914062, 630.19677734375, 724.128173828125, 397.574951171875, 147.7141876220703, -32.08868408203125, 275.7287902832031, 56.67865753173828, 953.370849609375, 148.86293029785156, -249.27227783203125, 237.26632690429688, -193.56591796875, 206.03648376464844, 267.55023193359375, 816.6231689453125, 83.08006286621094, -328.158447265625, 420.3651123046875, -46.884307861328125, 901.338623046875, 362.38189697265625, 138.3380126953125, -143.7186279296875, 421.34722900390625, 379.67608642578125, 174.66946411132812, 142.21234130859375, -858.6939697265625, 463.01153564453125, 737.154052734375, 438.5130615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 195.93124389648438, "std": 361.6699523925781, "min": -758.72314453125, "p10": -179.46407165527344, "median": 180.97200775146484, "p90": 635.4801696777345, "max": 1015.3081665039062, "pos_frac": 0.671875, "sample": [197.6243896484375, -4.916656494140625, 38.236297607421875, 595.64306640625, 521.8677978515625, 809.4224243164062, 642.602294921875, -39.83689880371094, 152.8971405029297, 618.8618774414062, 464.9617614746094, 83.99723815917969, 444.6081848144531, 270.54620361328125, 934.7978515625, -124.8885726928711, 420.1060791015625, 410.1610412597656, 695.2545776367188, 260.1996765136719, 958.0419311523438, 725.3101806640625, -221.10482788085938, -143.1319122314453, -163.6779022216797, 104.75006103515625, 615.822509765625, 53.45188522338867, -352.1942443847656, -147.98077392578125, 164.3196258544922, 232.8297119140625, -70.43641662597656, -35.206661224365234, 288.2034912109375, -135.14442443847656, 256.64093017578125, 104.74271392822266, 222.95327758789062, -84.16885375976562, 58.65172576904297, -23.574920654296875, 1015.3081665039062, 232.10142517089844, 580.3720703125, 410.6849365234375, -18.5069580078125, -175.871337890625, 595.3389892578125, -192.6754608154297, -71.00409698486328, 402.4638671875, -181.00381469726562, 244.8075408935547, 96.49602508544922, -311.7518005371094, 116.46253967285156, 293.3796081542969, 411.8375244140625, 213.30613708496094, 143.0948028564453, -729.3736572265625, -758.72314453125, 421.6134338378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 260.62750244140625, "std": 291.77801513671875, "min": -310.4764404296875, "p10": -79.7708282470703, "median": 215.24088287353516, "p90": 654.4214599609376, "max": 1033.580810546875, "pos_frac": 0.8125, "sample": [1033.580810546875, 624.5112915039062, 126.75762176513672, 109.9176254272461, 105.2049331665039, 915.39208984375, -112.9727554321289, 152.27716064453125, 303.238037109375, -245.2042236328125, 165.2981414794922, 270.6266784667969, 508.4107666015625, 526.1952514648438, 422.92047119140625, 642.5997314453125, 141.22410583496094, 35.91584777832031, 459.80242919921875, 138.01280212402344, 406.4270324707031, -291.9068603515625, 55.43519592285156, 282.0363464355469, 792.8372802734375, -87.3426742553711, 310.00634765625, -20.762115478515625, 587.586181640625, 371.1711120605469, 681.439208984375, -150.74681091308594, -46.513912200927734, 247.0740966796875, 498.0892028808594, -310.4764404296875, -179.65560913085938, 421.9276123046875, -19.759857177734375, 354.7048645019531, 239.14559936523438, 220.5164794921875, 408.22247314453125, 110.2535629272461, 253.47303771972656, 207.23558044433594, 163.70814514160156, 352.2314147949219, 164.21409606933594, 209.9652862548828, 841.5718383789062, 553.6537475585938, 337.26507568359375, -62.103187561035156, 154.06805419921875, 693.3943481445312, 82.19950866699219, -57.23245620727539, 512.3543701171875, 659.4879150390625, 138.89842224121094, 111.65135955810547, 9.160507202148438, 151.54525756835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 256.0433654785156, "std": 374.42041015625, "min": -1015.8056640625, "p10": -136.99608917236327, "median": 242.56287384033203, "p90": 797.4354858398439, "max": 990.2091064453125, "pos_frac": 0.71875, "sample": [-13.626976013183594, -73.49978637695312, 139.94509887695312, -312.8189392089844, 722.6483154296875, -247.8207550048828, -132.02099609375, 724.3125, 291.16180419921875, -24.428356170654297, 303.86676025390625, 273.1473388671875, 178.7830810546875, -73.21479797363281, -392.26361083984375, 239.2008056640625, 918.9300537109375, -37.508689880371094, 289.89776611328125, 237.42369079589844, 990.2091064453125, -137.14431762695312, -3.4702415466308594, 738.1552124023438, 211.41302490234375, -0.00626373291015625, 833.514404296875, 66.66419982910156, 12.942626953125, 288.35943603515625, 971.9274291992188, 181.8716583251953, 821.744873046875, 278.84283447265625, 451.81573486328125, 478.865478515625, 395.3587646484375, 41.18446350097656, 245.92494201660156, 286.9102783203125, -190.01675415039062, -1.391998291015625, 258.5957336425781, 9.215167999267578, 403.4291687011719, 770.42626953125, -148.80056762695312, 684.8674926757812, 70.41889953613281, 313.5821228027344, 809.0108642578125, 152.71524047851562, -136.6502227783203, 387.6026306152344, 459.481201171875, -36.000450134277344, 827.810546875, 560.6068115234375, 205.4661865234375, 630.486328125, -1015.8056640625, 109.82510375976562, 532.6453857421875, 562.0570678710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 203.0653533935547, "std": 242.71876525878906, "min": -319.3823547363281, "p10": -115.69941024780272, "median": 198.08783721923828, "p90": 461.929104614258, "max": 987.5548095703125, "pos_frac": 0.828125, "sample": [256.5067443847656, 9.252044677734375, 987.5548095703125, -171.85304260253906, 5.5775146484375, 66.79527282714844, -100.60142517089844, -55.60386657714844, 710.4381713867188, 238.13265991210938, 482.0257263183594, 539.9973754882812, 202.8821563720703, -319.3823547363281, 341.60699462890625, 126.72821044921875, -167.6288604736328, 58.15493392944336, 397.64166259765625, 390.8667907714844, 176.01919555664062, 390.1170349121094, 120.83763122558594, 419.6923828125, 360.3032531738281, 287.50634765625, 121.6430892944336, -282.9016418457031, -279.1959533691406, 78.2383804321289, 311.08013916015625, 59.56733322143555, 63.06507110595703, 257.2893371582031, 213.37867736816406, 60.693382263183594, 419.8462829589844, 302.0643005371094, 348.7236328125, 67.373046875, 76.03987121582031, 160.61961364746094, 402.08251953125, 479.964599609375, 397.3680419921875, 289.9814453125, 356.34521484375, 146.28823852539062, 264.9791564941406, -131.773193359375, 193.29351806640625, -79.34728240966797, 124.79805755615234, 407.62677001953125, 98.04269409179688, -122.16997528076172, -19.717708587646484, 581.4786987304688, 417.3861999511719, 92.31700134277344, 566.8233032226562, 386.8340148925781, 135.53018188476562, 276.9591369628906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 234.9559326171875, "std": 243.51223754882812, "min": -459.1950378417969, "p10": -32.65851974487303, "median": 241.05801391601562, "p90": 551.7410339355469, "max": 873.5665283203125, "pos_frac": 0.859375, "sample": [-15.44879150390625, -21.98284149169922, 34.72600555419922, 237.0889892578125, 449.7532043457031, 323.02862548828125, 413.1556701660156, 156.63296508789062, -94.16654205322266, 503.9471435546875, 180.43434143066406, 43.837615966796875, 197.03390502929688, 151.81195068359375, 551.3905639648438, 136.5614013671875, 600.7607421875, 276.53228759765625, 54.84351348876953, 59.729347229003906, 314.3780822753906, 155.30160522460938, -200.47230529785156, 333.7220153808594, -137.92642211914062, 272.41607666015625, 106.96635437011719, 488.30059814453125, 162.2001495361328, 24.744537353515625, 455.20281982421875, 313.14398193359375, 197.1962890625, 741.6229858398438, 873.5665283203125, 115.99562072753906, 256.9223327636719, 3.9865875244140625, 587.8592529296875, 275.7242431640625, 68.92024230957031, 117.59107971191406, 519.958984375, 184.49583435058594, 277.296875, 592.97802734375, 404.44195556640625, -102.6597900390625, 551.8912353515625, 316.3037109375, -459.1950378417969, 46.57478332519531, 166.8953857421875, -237.26272583007812, 245.02703857421875, 295.7199401855469, 484.5810852050781, 147.260986328125, 295.08941650390625, 659.5159912109375, -37.23381042480469, 323.98577880859375, 258.9061584472656, 335.5740661621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 253.68276977539062, "std": 325.3328552246094, "min": -530.419677734375, "p10": -142.40756988525385, "median": 190.32553100585938, "p90": 705.7121765136721, "max": 1055.4927978515625, "pos_frac": 0.84375, "sample": [-29.060260772705078, 419.5101318359375, -83.12255096435547, 439.946044921875, 748.983642578125, 169.3233184814453, 120.6148910522461, 511.7702331542969, 195.25454711914062, 470.151611328125, 391.9164123535156, 1055.4927978515625, -344.054931640625, 42.126800537109375, 733.6058959960938, 477.9781494140625, 365.4151611328125, 373.0978088378906, 61.39927673339844, 59.34082794189453, 68.04720306396484, 270.45196533203125, 190.34423828125, 184.97756958007812, 264.4273986816406, 254.26065063476562, 66.1309585571289, -232.44012451171875, 1019.0975952148438, 586.6045532226562, -86.90835571289062, 112.78738403320312, 171.90216064453125, -193.5526123046875, 129.4869842529297, 818.377685546875, 928.9088745117188, 349.2291259765625, 640.6268310546875, 157.63682556152344, 238.13723754882812, 587.576416015625, -243.3156280517578, 149.07322692871094, 98.87960815429688, 167.40447998046875, 161.6497344970703, 193.45237731933594, -166.1929473876953, 432.2878112792969, -530.419677734375, 823.4703369140625, 40.747169494628906, 2.9158554077148438, 74.69430541992188, 181.7890625, 573.0675048828125, 402.6528625488281, -413.0618896484375, 399.3601989746094, 512.171875, 189.81532287597656, 289.1484680175781, 190.30682373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 211.57708740234375, "std": 260.4521179199219, "min": -374.242431640625, "p10": -112.6588424682617, "median": 208.24935913085938, "p90": 547.8180114746094, "max": 885.6591186523438, "pos_frac": 0.796875, "sample": [281.95989990234375, -128.74905395507812, 224.28428649902344, 31.066879272460938, 256.9017333984375, 885.6591186523438, 131.80178833007812, 114.28901672363281, 352.0165710449219, 553.69384765625, -299.4765319824219, 342.82452392578125, 122.96240234375, 18.883007049560547, -374.242431640625, 204.23275756835938, 379.947509765625, 559.1414184570312, -57.370460510253906, 184.90390014648438, 812.0158081054688, 54.45084762573242, 232.79574584960938, 708.941650390625, 330.75384521484375, 161.09561157226562, 81.21268463134766, 212.26596069335938, 127.89464569091797, 154.79991149902344, 82.32447052001953, 160.64344787597656, 420.0355529785156, 32.30707931518555, 575.0626220703125, 212.99679565429688, 401.809326171875, 91.52116394042969, 302.2258605957031, 262.3833923339844, 103.2143783569336, -11.34144115447998, 698.5556640625, 453.9288024902344, 348.5535583496094, 72.64178466796875, 490.7353210449219, -12.963268280029297, 534.1077270507812, -95.53047180175781, 411.126708984375, -229.94192504882812, 26.460716247558594, 327.288330078125, -193.30654907226562, 236.31443786621094, -15.517593383789062, 476.76226806640625, -119.99957275390625, -41.77369689941406, 275.044921875, -202.9265899658203, 332.9012756347656, 474.33233642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 287.83770751953125, "std": 333.9605407714844, "min": -534.7452392578125, "p10": -46.87202606201171, "median": 241.0502166748047, "p90": 720.998425292969, "max": 1405.402587890625, "pos_frac": 0.84375, "sample": [1405.402587890625, 623.99267578125, -106.09919738769531, 474.90643310546875, 527.162353515625, 835.7321166992188, 143.07806396484375, -255.8812255859375, 98.83708953857422, 283.89044189453125, 83.83419799804688, 905.7626953125, 506.759033203125, 221.2181396484375, -534.7452392578125, 62.05042266845703, 330.90966796875, 194.3651885986328, 514.0322875976562, -61.942039489746094, 150.48947143554688, 764.5795288085938, 153.69073486328125, -15.644447326660156, 240.94740295410156, 270.47308349609375, 79.83858489990234, -48.52525329589844, 172.56988525390625, 454.8621520996094, 54.97820281982422, 136.3210906982422, 325.73077392578125, 62.734527587890625, 484.046142578125, 245.4949951171875, 81.72105407714844, 101.23072052001953, 540.6048583984375, 170.59591674804688, 429.1775817871094, 32.47807693481445, 454.6384582519531, 97.04669952392578, 171.3785400390625, 15.584022521972656, 396.6583251953125, 323.20611572265625, -161.45986938476562, 383.5251159667969, 327.47625732421875, 1232.1767578125, -136.7263946533203, 420.81451416015625, 251.76869201660156, 21.864410400390625, -43.014495849609375, 646.4971313476562, 752.9275512695312, 812.0770263671875, 579.9996948242188, 531.4906616210938, 241.1530303955078, -39.12782287597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 241.7104034423828, "std": 315.6755676269531, "min": -405.191162109375, "p10": -128.82447967529296, "median": 236.34882354736328, "p90": 599.7081115722657, "max": 1017.663330078125, "pos_frac": 0.734375, "sample": [34.00384521484375, 532.293212890625, 405.5044250488281, -337.4859313964844, 82.51480102539062, 395.36407470703125, 219.34645080566406, 555.6168823242188, 439.29730224609375, 319.67034912109375, 206.37112426757812, -101.95612335205078, 328.84552001953125, -191.2366943359375, 234.3036346435547, 532.8515625, 87.29679107666016, 491.6859130859375, 412.319580078125, -88.14700317382812, -305.9508972167969, -22.84353256225586, 605.0233154296875, 7.052791595458984, 288.0342712402344, 699.48828125, -405.191162109375, -55.75312805175781, 116.2129135131836, 131.8056640625, 225.64581298828125, -77.36203002929688, 157.34864807128906, 236.06790161132812, -251.701904296875, 386.40838623046875, 255.8374481201172, 587.3059692382812, 407.60400390625, 863.0087890625, 481.58221435546875, -91.56834411621094, 501.5434265136719, 437.92474365234375, 265.23260498046875, 371.1547546386719, -8.059663772583008, 48.5454216003418, 440.25946044921875, 104.85285186767578, 97.71170806884766, 1017.663330078125, -12.911209106445312, -134.83346557617188, 252.7648162841797, 869.2847900390625, -4.34033203125, 872.7149047851562, 392.60040283203125, -114.80351257324219, 391.9025573730469, -197.5941619873047, 236.62974548339844, 844.70849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 240.36083984375, "std": 375.01568603515625, "min": -581.954833984375, "p10": -107.34869689941407, "median": 246.84619903564453, "p90": 560.8591613769532, "max": 2113.89208984375, "pos_frac": 0.75, "sample": [241.82981872558594, 257.4346008300781, 632.8641357421875, 491.27996826171875, -545.8934936523438, -107.8434066772461, -276.2784423828125, 106.55537414550781, 456.22686767578125, 251.86257934570312, 487.2942199707031, 51.38215637207031, 302.860595703125, 415.63397216796875, 205.313720703125, -20.580474853515625, -61.04650115966797, 2113.89208984375, -61.662010192871094, 387.3798828125, 138.5072479248047, 512.5113525390625, 614.6461181640625, 172.35035705566406, 206.65480041503906, -89.64178466796875, 136.89068603515625, -3.594757080078125, 199.5568084716797, 293.18231201171875, 191.04736328125, -55.24078369140625, -291.343505859375, -106.19437408447266, 208.19664001464844, 538.1304931640625, 808.4886474609375, 517.6419067382812, -422.5931396484375, 314.4542541503906, 569.1041259765625, 644.9509887695312, 367.1042785644531, 162.00274658203125, 593.7970581054688, 140.79197692871094, 149.75881958007812, 142.59397888183594, 541.6209106445312, 363.12249755859375, 463.3838195800781, 310.917724609375, 516.84912109375, 290.53973388671875, 411.7229309082031, -28.47713279724121, -88.37094116210938, -294.5540466308594, -581.954833984375, 265.8975830078125, 446.32159423828125, 81.71029663085938, 294.89837646484375, 407.2072448730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 149.90504455566406, "std": 287.0014953613281, "min": -690.0018920898438, "p10": -144.47246704101562, "median": 120.6194839477539, "p90": 476.970623779297, "max": 1099.7591552734375, "pos_frac": 0.71875, "sample": [-54.31959915161133, 10.089752197265625, 103.4618148803711, -29.140274047851562, 167.43814086914062, 325.77813720703125, 48.175498962402344, 147.2728271484375, 24.875659942626953, 440.3512878417969, -125.31929779052734, 173.9601593017578, -231.17193603515625, -140.5806884765625, 151.0271453857422, 52.06770324707031, 347.01177978515625, 140.98837280273438, 62.70880889892578, -125.0797119140625, 211.66371154785156, 452.6842041015625, 422.7901916503906, 571.3084716796875, -166.85133361816406, 105.91520690917969, 1099.7591552734375, -220.9222869873047, -60.544342041015625, 387.91448974609375, 487.37908935546875, -123.4814682006836, 89.56532287597656, 44.73056411743164, -66.31649017333984, 334.8286437988281, 77.66275787353516, -129.02426147460938, 492.4502868652344, 135.32376098632812, 100.86434173583984, -144.48422241210938, 202.6717529296875, 183.99288940429688, 688.8555908203125, -154.00241088867188, 607.5009155273438, -87.94481658935547, -342.2972412109375, 35.244346618652344, 58.526451110839844, 302.5835876464844, -144.44503784179688, 449.56976318359375, 384.49920654296875, 50.54792785644531, 813.821533203125, 331.53326416015625, 300.49383544921875, -690.0018920898438, 247.3399658203125, 243.26541137695312, 319.6742858886719, 199.68211364746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 282.7807922363281, "std": 289.9497985839844, "min": -219.56715393066406, "p10": -45.39572906494138, "median": 225.93966674804688, "p90": 640.1938720703128, "max": 1240.1517333984375, "pos_frac": 0.859375, "sample": [11.720314025878906, 191.61306762695312, -181.25582885742188, 65.60354614257812, 226.67442321777344, -74.9286880493164, 546.3128662109375, 349.150390625, 397.74981689453125, 393.09478759765625, 671.1105346679688, -192.08790588378906, 222.244384765625, 119.78536987304688, 123.70112609863281, 13.446683883666992, 713.079345703125, 806.520751953125, 509.40838623046875, 225.2049102783203, -101.42766571044922, 467.64434814453125, 489.64678955078125, 57.16990661621094, 267.8248291015625, 435.217041015625, 198.3147430419922, 1240.1517333984375, -174.40200805664062, 207.229736328125, 238.92201232910156, 345.87652587890625, 407.9244384765625, 157.84426879882812, 568.0549926757812, 282.54656982421875, 414.9281005859375, 172.30828857421875, 198.81045532226562, 165.82797241210938, 353.35235595703125, 433.45880126953125, 38.90362548828125, -56.28148651123047, 499.29132080078125, 888.37646484375, 426.2669677734375, 538.5012817382812, 423.6173400878906, -219.56715393066406, 161.53268432617188, 170.8005828857422, 48.377315521240234, 420.68603515625, 137.28347778320312, 155.5012664794922, 252.4005889892578, -19.252246856689453, 1036.85595703125, 8.833633422851562, -19.995628356933594, 104.2242431640625, 415.8467102050781, 720.3946533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 223.15362548828125, "std": 355.16412353515625, "min": -355.1872863769531, "p10": -147.37517547607422, "median": 225.32318115234375, "p90": 627.9534362792972, "max": 1648.14404296875, "pos_frac": 0.703125, "sample": [245.1337432861328, 689.5889892578125, 5.392848968505859, -98.48377227783203, 400.64892578125, 559.5588989257812, -297.0993957519531, 427.0780029296875, 274.4305114746094, 263.6940002441406, 70.82916259765625, 230.47964477539062, 194.98236083984375, 318.5833740234375, 484.72296142578125, 79.7335433959961, 1084.038330078125, 971.0831909179688, 696.25732421875, -9.250175476074219, 184.5379638671875, 413.356201171875, -342.2571716308594, -269.2847900390625, -107.75306701660156, -42.48014450073242, -214.0345916748047, 236.47354125976562, 387.0632019042969, 224.14678955078125, 211.49575805664062, 477.14794921875, 428.14837646484375, 226.49957275390625, 488.99090576171875, 341.999755859375, 320.181640625, 218.53231811523438, 387.7417907714844, -74.45692443847656, 45.93574523925781, 163.147216796875, 657.265380859375, -149.00918579101562, -274.20794677734375, 513.609375, 281.58184814453125, -142.96737670898438, 4.811668395996094, 148.262451171875, 92.82617950439453, 252.90415954589844, -143.56248474121094, -123.89276885986328, -130.18936157226562, 337.5316467285156, 469.9491882324219, -355.1872863769531, 385.976318359375, -8.732170104980469, -118.34949493408203, 1648.14404296875, 700.8740234375, -62.341705322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 235.9246368408203, "std": 308.42108154296875, "min": -346.8325500488281, "p10": -121.58486557006836, "median": 196.89334106445312, "p90": 629.7879150390626, "max": 1110.0362548828125, "pos_frac": 0.796875, "sample": [112.20191955566406, 286.42626953125, -222.12542724609375, 393.4421691894531, 192.06900024414062, -205.56192016601562, 6.780506134033203, 130.14915466308594, 583.322021484375, 71.33816528320312, 728.38330078125, -25.18213653564453, -346.8325500488281, 33.81846618652344, -45.29022216796875, -161.42510986328125, -120.1678237915039, 805.9590454101562, 612.0360107421875, -217.15196228027344, 529.0770263671875, -230.8560791015625, 318.677490234375, 350.7254333496094, 217.97337341308594, 405.39752197265625, 300.03826904296875, 7.7826995849609375, -68.5270004272461, 585.4605712890625, 595.6881713867188, 835.6314697265625, -113.63655090332031, 334.4993896484375, 180.19622802734375, 70.65435791015625, 174.85964965820312, -60.07935333251953, 214.498046875, 637.3958740234375, 4.729866027832031, 520.296875, 37.63222122192383, 203.68531799316406, 171.32762145996094, 105.95733642578125, 545.18359375, 835.9795532226562, 261.18634033203125, 184.89053344726562, 421.28509521484375, 345.58697509765625, 1110.0362548828125, 871.6122436523438, 286.30499267578125, 40.890464782714844, 42.38786315917969, -122.19216918945312, 214.87533569335938, 452.9632263183594, 201.71768188476562, 27.118345260620117, 175.4266357421875, 262.6490783691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 250.00375366210938, "std": 335.9632263183594, "min": -503.7817687988281, "p10": -156.66450500488276, "median": 233.3552703857422, "p90": 755.5499816894533, "max": 1032.6717529296875, "pos_frac": 0.78125, "sample": [466.0976257324219, -222.03271484375, 234.71185302734375, 59.69465637207031, 243.7101287841797, 368.4043273925781, 281.463134765625, 290.83428955078125, -204.12721252441406, 247.05589294433594, -279.59033203125, 470.1961669921875, 481.8899230957031, -20.626182556152344, 165.16287231445312, -78.20619201660156, 721.7036743164062, 365.1492004394531, 910.6357421875, -95.58768463134766, 663.8456420898438, 18.508766174316406, 486.1156005859375, -174.8806610107422, 231.99868774414062, 15.289031982421875, 198.81631469726562, 373.225341796875, -81.79686737060547, 6.259544372558594, 809.0758056640625, 1009.6134643554688, 484.53289794921875, 124.83990478515625, 320.56884765625, 60.141143798828125, 201.7967529296875, 302.73211669921875, 941.486572265625, 174.2528076171875, 497.204345703125, 593.7601318359375, -503.7817687988281, 13.655807495117188, -204.01376342773438, 338.22113037109375, -78.53013610839844, 831.4951782226562, 16.517587661743164, -114.16014099121094, 51.528045654296875, -218.13424682617188, 53.901756286621094, 1032.6717529296875, 770.0555419921875, 256.3250427246094, 312.15338134765625, 544.1304931640625, 163.48268127441406, 517.7936401367188, 385.6343078613281, 82.10630798339844, 95.29627227783203, -10.033618927001953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 230.993896484375, "std": 269.79644775390625, "min": -606.1815185546875, "p10": -15.484920883178688, "median": 186.64108276367188, "p90": 579.9358367919925, "max": 1071.8424072265625, "pos_frac": 0.890625, "sample": [158.71580505371094, 627.594970703125, 21.368247985839844, 211.65069580078125, 110.40211486816406, 615.4320678710938, 159.81216430664062, 265.62347412109375, 189.67050170898438, 232.04461669921875, 364.8548278808594, 264.45556640625, 259.6545104980469, 408.8010559082031, -169.32171630859375, 138.34613037109375, 183.61166381835938, 63.83088684082031, 86.81922912597656, 196.342041015625, 57.90277862548828, 462.996826171875, 838.0775756835938, 323.6900329589844, 178.90347290039062, 447.3097229003906, 340.6219482421875, 132.68072509765625, 162.48223876953125, 729.8699951171875, 6.185009002685547, 332.52215576171875, 168.69949340820312, 253.66372680664062, 63.00788497924805, 256.5418395996094, 483.5296630859375, -182.080322265625, 438.91156005859375, 735.4256591796875, 356.4592590332031, -24.77203369140625, -36.6502685546875, 497.1112976074219, 56.28472900390625, -72.99966430664062, 438.2633056640625, 22.47107696533203, 73.17880249023438, 192.41934204101562, 900.7810668945312, 80.6917953491211, 58.69927978515625, -149.64688110351562, 40.87565612792969, 267.30914306640625, 274.2832946777344, 191.11477661132812, 183.50811767578125, -606.1815185546875, 107.19902038574219, 1071.8424072265625, 97.87518310546875, 142.84072875976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 294.732421875, "std": 304.8574523925781, "min": -230.92178344726562, "p10": -35.642400550842275, "median": 246.99508666992188, "p90": 690.0265014648438, "max": 1044.325927734375, "pos_frac": 0.859375, "sample": [29.172000885009766, 526.4375, -20.1715087890625, 401.8146057128906, 163.03802490234375, 59.6529426574707, 257.6119384765625, 330.02093505859375, 539.2899780273438, 160.7864227294922, 696.257568359375, 151.1924285888672, 622.9116821289062, -205.07781982421875, 157.72393798828125, 355.5992736816406, 213.86444091796875, -230.92178344726562, 416.64935302734375, 695.8731689453125, 191.43765258789062, 676.38427734375, 321.4603271484375, 429.5032958984375, 1044.325927734375, 1.7894134521484375, 642.7056884765625, 427.4834289550781, -133.90673828125, 65.27711486816406, 571.0752563476562, 326.468017578125, 97.3355712890625, 970.40625, 492.825927734375, 203.33514404296875, 92.87696838378906, 273.24322509765625, 140.49319458007812, 131.66612243652344, -170.77142333984375, -189.71826171875, -27.465768814086914, -206.16836547851562, 560.4569091796875, 206.94190979003906, 104.00006866455078, 133.42303466796875, -39.146671295166016, 597.4191284179688, 4.913494110107422, 782.0635375976562, 163.96554565429688, 1029.3212890625, 260.09649658203125, 995.4345703125, 170.8020782470703, 236.37823486328125, 501.55926513671875, 135.89663696289062, 258.06048583984375, 300.2929382324219, 435.9131774902344, 331.2965087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 270.20220947265625, "std": 339.3094787597656, "min": -398.98126220703125, "p10": -71.88291854858397, "median": 176.06005859375, "p90": 689.2257568359377, "max": 1250.0809326171875, "pos_frac": 0.8125, "sample": [70.20703125, 106.47987365722656, 475.4295959472656, 662.7666625976562, -91.16502380371094, 11.288978576660156, 323.812255859375, -268.60186767578125, 38.73887634277344, -120.65166473388672, 560.239501953125, -398.98126220703125, 700.5653686523438, 389.83502197265625, 498.8801574707031, 1103.040771484375, 169.6628875732422, 164.44866943359375, 545.3502197265625, 287.26904296875, 537.753173828125, 177.35491943359375, 555.5469970703125, 411.56781005859375, 1115.03857421875, -38.09492492675781, 261.99261474609375, 197.99671936035156, 792.40771484375, 393.53985595703125, 48.622344970703125, 174.91000366210938, 77.4715576171875, 5.9272308349609375, 174.25900268554688, 96.93879699707031, 84.87102508544922, 379.8016357421875, 39.49280548095703, 419.15386962890625, 117.97761535644531, 490.8159484863281, 147.37704467773438, 69.84613800048828, 161.94992065429688, -64.79508209228516, 372.6502380371094, 58.350555419921875, -31.019973754882812, 60.31884765625, 177.21011352539062, 1250.0809326171875, 616.5345458984375, 902.1725463867188, -314.9755554199219, 553.0744018554688, 435.6556396484375, 588.041015625, 733.4683837890625, 183.17529296875, -54.17871856689453, -74.92056274414062, -50.49353790283203, -170.541259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 232.86073303222656, "std": 348.07135009765625, "min": -535.3818359375, "p10": -213.0395782470703, "median": 220.87567901611328, "p90": 652.881884765625, "max": 1094.468017578125, "pos_frac": 0.75, "sample": [97.7447738647461, 382.37554931640625, 102.27977752685547, -301.61578369140625, 596.044677734375, -55.97124481201172, 706.9805908203125, 15.552715301513672, 52.238502502441406, -87.89627075195312, 258.7762451171875, -346.1838684082031, 604.8640747070312, -7.32745361328125, 633.363037109375, 174.9290008544922, 424.14251708984375, -53.259239196777344, 0.5933780670166016, 95.03663635253906, 716.1260986328125, 203.20416259765625, 408.226318359375, 261.5340881347656, 628.5548706054688, 835.126220703125, 194.16644287109375, 410.86480712890625, -406.38592529296875, 225.43817138671875, -59.21592712402344, 448.2188415527344, 231.25985717773438, 148.73777770996094, 458.6539001464844, -218.77505493164062, 1094.468017578125, 646.7216796875, 579.56298828125, -321.66387939453125, 377.3785095214844, 542.37109375, 223.0786590576172, -535.3818359375, -64.44607543945312, 354.795166015625, 83.94602966308594, 734.8847045898438, 80.08330535888672, -199.65679931640625, 496.1304626464844, 956.6478881835938, 529.1875, 119.43284606933594, 655.52197265625, 152.6520538330078, 218.67269897460938, 254.72915649414062, -483.6170959472656, 244.62496948242188, 411.1126403808594, -91.80146789550781, 160.90567016601562, -95.65648651123047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 339.0614013671875, "std": 304.7965087890625, "min": -412.3353271484375, "p10": -35.5014892578125, "median": 342.85658264160156, "p90": 687.8160705566407, "max": 1172.9298095703125, "pos_frac": 0.828125, "sample": [291.1367492675781, 477.27142333984375, 537.4201049804688, 230.91146850585938, 549.8603515625, -36.092132568359375, 452.16009521484375, 232.61941528320312, -82.29601287841797, 632.1436767578125, 204.18212890625, 162.25006103515625, 591.3641357421875, 285.77276611328125, -412.3353271484375, 591.0698852539062, 702.3568115234375, 232.94229125976562, 415.9012451171875, 311.8809814453125, 807.4368286132812, 584.4735717773438, -34.123321533203125, 371.0823059082031, -2.208841323852539, 324.96563720703125, 42.31449890136719, 490.5101318359375, 377.27716064453125, 357.37371826171875, 334.9610290527344, 107.99725341796875, -107.12207794189453, 328.0616455078125, 320.1820983886719, -207.34083557128906, 21.993919372558594, 695.4159545898438, 354.9126281738281, 536.6262817382812, -27.285354614257812, 321.6119689941406, 359.2885437011719, 1019.8751220703125, 11.805299758911133, 654.2063598632812, 350.75213623046875, 165.38803100585938, 14.065418243408203, 1172.9298095703125, 724.194091796875, -24.046875, 280.38360595703125, 518.5367431640625, 261.8148193359375, 595.7791748046875, -234.3490753173828, 669.8052978515625, 659.0403442382812, 355.5230712890625, 369.30755615234375, 670.0830078125, 793.4271240234375, -57.515380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 187.20950317382812, "std": 395.0918273925781, "min": -817.1409301757812, "p10": -254.2351577758789, "median": 125.24497985839844, "p90": 804.0902832031252, "max": 1134.44580078125, "pos_frac": 0.703125, "sample": [130.19349670410156, 373.71905517578125, -15.925640106201172, 250.22186279296875, 600.3928833007812, 97.45711517333984, -376.2322082519531, -441.3616638183594, -817.1409301757812, -121.96867370605469, 94.37776947021484, 350.8846740722656, -182.70355224609375, 54.429954528808594, -63.216522216796875, 6.426660537719727, 341.9019470214844, 259.7930603027344, 142.55258178710938, 120.29646301269531, -37.82891845703125, -260.91497802734375, -72.00843048095703, 1033.179443359375, 14.840002059936523, 716.55517578125, 877.5582885742188, 450.8248291015625, 834.0294799804688, 202.02857971191406, 73.51687622070312, 334.22686767578125, -49.364315032958984, 4.357460021972656, -78.87215423583984, -488.0901794433594, 993.0338134765625, -58.4874382019043, 192.2738800048828, 495.5409240722656, 193.5678253173828, 548.4740600585938, 332.6802062988281, -51.27006530761719, 301.5123291015625, 135.28045654296875, 150.58241271972656, -238.64891052246094, 1134.44580078125, -531.3451538085938, 595.3128051757812, 486.75970458984375, 299.7712097167969, 107.18309783935547, 910.214111328125, 826.0050048828125, 51.757728576660156, 322.3324890136719, 31.27459716796875, 41.093650817871094, 110.06768798828125, 752.9559326171875, -270.7831726074219, -238.31298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 366.7662353515625, "std": 324.39715576171875, "min": -207.52130126953125, "p10": 13.803729248046881, "median": 320.5401306152344, "p90": 846.4790039062501, "max": 1481.716064453125, "pos_frac": 0.90625, "sample": [412.169677734375, 27.90791130065918, 104.40511322021484, 438.9450988769531, 285.6449279785156, 381.64898681640625, -36.03056335449219, 254.60311889648438, -99.4172134399414, 686.6248779296875, 445.4093017578125, 619.6453247070312, 810.5753784179688, 209.34690856933594, 27.4608154296875, 575.8631591796875, -14.787307739257812, 422.922119140625, 238.29588317871094, 325.9520263671875, 575.2750244140625, 299.5408020019531, 541.1373291015625, -207.52130126953125, 276.9683532714844, 488.36102294921875, 86.17840576171875, 954.63232421875, 20.043380737304688, 996.6810302734375, 222.634521484375, 409.95263671875, 268.8663635253906, 416.51239013671875, 64.83850860595703, 324.6876220703125, 861.8662719726562, 46.759368896484375, 182.16091918945312, 697.1256103515625, 11.129592895507812, -48.15785598754883, 782.4923095703125, 1050.544921875, 284.1683654785156, 460.7815246582031, 879.3197021484375, 161.5389404296875, 346.3011169433594, 1481.716064453125, 471.50311279296875, 583.2674560546875, 55.45695495605469, 936.0079345703125, 297.94580078125, 316.39263916015625, 396.2154541015625, 333.30255126953125, 159.67788696289062, -201.17782592773438, 401.4382629394531, 130.01858520507812, 240.51779174804688, 298.7491760253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 273.35260009765625, "std": 308.1062316894531, "min": -303.9697265625, "p10": -71.56012496948242, "median": 222.5445098876953, "p90": 732.4203063964844, "max": 1201.095703125, "pos_frac": 0.84375, "sample": [131.85537719726562, 41.992652893066406, 128.79183959960938, 176.56195068359375, 355.7851867675781, 5.238010406494141, 220.65200805664062, 350.5179443359375, -64.46344757080078, -269.56329345703125, 155.17648315429688, -70.40719604492188, -303.9697265625, 496.0190734863281, 209.7420196533203, 829.378662109375, 311.6113586425781, 33.27123260498047, 374.5491943359375, 188.21925354003906, 373.67608642578125, 53.60893630981445, 739.7606201171875, -72.05423736572266, 6.566934585571289, 579.8363647460938, 527.5090942382812, 744.9550170898438, 276.4792785644531, 892.6600341796875, -113.95405578613281, 531.9029541015625, 715.2929077148438, 3.5929527282714844, 31.95287322998047, 574.072265625, -154.85232543945312, 429.1263122558594, 629.4918823242188, 527.933349609375, -217.2263641357422, 258.4054260253906, 1201.095703125, 37.25718688964844, 521.6539916992188, 128.46351623535156, 415.1238098144531, 768.6668701171875, 388.4042053222656, 847.53662109375, 331.1045837402344, 451.9814147949219, 301.3352355957031, 132.78431701660156, -19.293092727661133, 39.50190734863281, 59.90888214111328, 386.73712158203125, 127.397705078125, 87.95658874511719, 342.1736755371094, 224.43701171875, -87.16384887695312, 167.80908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 269.9066162109375, "std": 381.5378112792969, "min": -684.0192260742188, "p10": -251.47975158691403, "median": 244.2093963623047, "p90": 721.6809326171875, "max": 1423.4857177734375, "pos_frac": 0.78125, "sample": [-273.05322265625, 290.151123046875, 57.36278533935547, 642.0062255859375, 523.1925659179688, 216.6696014404297, 420.2720947265625, 908.805908203125, 178.82342529296875, 812.747802734375, 1423.4857177734375, 171.34033203125, 246.89329528808594, 836.34375, 88.92015838623047, -341.2397155761719, 880.3390502929688, 212.54376220703125, -422.4305419921875, 162.92869567871094, -147.33139038085938, -12.806724548339844, -117.36924743652344, 479.76727294921875, 318.402587890625, 226.27706909179688, -99.60092163085938, 313.7262878417969, 230.8934783935547, 464.5797424316406, 521.3045654296875, -262.2615966796875, 436.66998291015625, 156.06759643554688, 615.5096435546875, 105.02873992919922, -154.49632263183594, 241.52549743652344, 432.88104248046875, 644.9520263671875, -684.0192260742188, 450.8821716308594, 68.32807922363281, 530.113037109375, 239.44183349609375, -362.8841552734375, -226.32211303710938, 598.5556030273438, 611.4739990234375, -85.47950744628906, 375.4361267089844, 39.118736267089844, 465.0877990722656, 347.5313720703125, 564.8466796875, 729.3192138671875, 296.5708923339844, 97.18946838378906, 11.150304794311523, -308.2635498046875, 914.8181762695312, 15.543205261230469, 703.8582763671875, 451.9049072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 219.40936279296875, "std": 402.1845397949219, "min": -494.6555480957031, "p10": -321.4615905761719, "median": 161.90501403808594, "p90": 773.0519836425782, "max": 1138.3775634765625, "pos_frac": 0.671875, "sample": [927.1019897460938, 751.476318359375, 309.4891662597656, -113.24763488769531, -452.66424560546875, -98.51138305664062, -414.38671875, 73.60104370117188, 482.6322021484375, -394.60113525390625, 55.193504333496094, 998.9348754882812, 549.570068359375, 166.10400390625, 103.24994659423828, 1138.3775634765625, -92.59307861328125, -9.698402404785156, -15.762847900390625, -127.21310424804688, 124.03156280517578, 235.5226593017578, 179.6584014892578, 577.6359252929688, 141.19073486328125, 168.0880584716797, -323.0899658203125, 776.7298583984375, -317.66204833984375, -121.9260482788086, 438.0561218261719, 91.32125854492188, 451.47265625, -83.01000213623047, 395.324951171875, 143.40377807617188, 269.7410888671875, -445.27789306640625, -66.4899673461914, 115.69607543945312, 512.2516479492188, 157.70602416992188, 724.70458984375, 345.5540466308594, -50.00082778930664, 154.60943603515625, 639.09423828125, -288.3902282714844, 348.8661804199219, 335.88446044921875, 470.54718017578125, -355.382080078125, 282.06158447265625, 908.8504638671875, 146.15335083007812, -494.6555480957031, 493.05816650390625, 346.67474365234375, -150.93701171875, 975.0057373046875, 938.97509765625, -217.91619873046875, 467.54400634765625, 764.4702758789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 284.07568359375, "std": 357.8520202636719, "min": -502.72711181640625, "p10": -93.3913955688476, "median": 243.05237579345703, "p90": 708.3157348632814, "max": 1336.565185546875, "pos_frac": 0.796875, "sample": [462.38720703125, 160.73818969726562, 243.87545776367188, 74.87162780761719, 128.17236328125, 257.3855285644531, 248.72427368164062, -460.3804931640625, 1121.35107421875, 156.5694580078125, 576.2767944335938, 252.58900451660156, 1336.565185546875, 453.67291259765625, 328.6390380859375, -37.87871170043945, 371.9920654296875, 319.18389892578125, 528.2931518554688, -136.28929138183594, 865.1044921875, 409.7197570800781, 313.236572265625, -114.72901916503906, 67.92823028564453, 211.8005828857422, 12.303253173828125, -123.05270385742188, 549.3563232421875, -43.603607177734375, -29.172256469726562, 242.2292938232422, 634.2750244140625, -317.2239990234375, -149.048583984375, 515.3319091796875, 855.709228515625, 174.99285888671875, 199.89808654785156, 13.875093460083008, 220.4341583251953, 164.06503295898438, 411.42108154296875, -42.54175567626953, 52.997528076171875, -4.193199157714844, 684.0306396484375, 334.93096923828125, 49.928253173828125, 239.84442138671875, 718.7236328125, 653.0961303710938, 375.08416748046875, 87.29722595214844, -502.72711181640625, 19.936649322509766, 956.2638549804688, 639.9661254882812, 1077.5164794921875, 351.6293029785156, 304.3055419921875, 618.1387329101562, 120.32402038574219, -25.296653747558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 194.8406982421875, "std": 341.65191650390625, "min": -900.896728515625, "p10": -224.8832305908203, "median": 150.16474151611328, "p90": 592.8716796875, "max": 885.0695190429688, "pos_frac": 0.734375, "sample": [-319.27020263671875, 534.6441650390625, 553.2940673828125, -348.98553466796875, -288.6561279296875, -80.56270599365234, 173.5597381591797, -212.25466918945312, -120.07278442382812, 278.5543212890625, 556.358642578125, 136.5691375732422, 398.2083740234375, 863.5379028320312, 636.7147827148438, 37.53216552734375, -98.50479125976562, 590.8201293945312, 121.75556182861328, 5.9379730224609375, 402.0997314453125, 142.9923095703125, 76.96638488769531, 134.54229736328125, 101.98100280761719, 561.2598876953125, 268.703369140625, 264.54010009765625, 61.06721496582031, 87.89201354980469, -176.88150024414062, 187.12728881835938, -3.3968429565429688, -136.79483032226562, 83.37960052490234, 98.36979675292969, 462.8149108886719, 811.76220703125, 92.99290466308594, -900.896728515625, 780.9198608398438, 394.8367919921875, 435.73760986328125, 57.860694885253906, 396.00628662109375, 208.7926788330078, 292.77099609375, 885.0695190429688, 526.0269165039062, 280.36614990234375, 593.7509155273438, -230.29547119140625, -29.667400360107422, 587.5682373046875, -26.96710968017578, 89.9526138305664, 157.33717346191406, 588.4739379882812, -112.96971130371094, -375.99285888671875, 673.392578125, 301.979736328125, -266.47235107421875, 221.62530517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 201.551025390625, "std": 275.1313171386719, "min": -331.74371337890625, "p10": -165.34100494384765, "median": 188.1929473876953, "p90": 552.5892150878907, "max": 791.1576538085938, "pos_frac": 0.78125, "sample": [158.56846618652344, 618.2251586914062, 732.7880859375, -85.65185546875, 480.6546630859375, 368.39276123046875, -67.2987060546875, 168.263916015625, -172.66815185546875, -139.0583953857422, 26.41710090637207, -287.48663330078125, -22.277549743652344, 19.97747802734375, 13.404090881347656, -193.59678649902344, 78.20237731933594, 281.01995849609375, 355.2868957519531, 26.428144454956055, 118.22251892089844, 431.885009765625, -131.9908447265625, 313.7124328613281, -169.31797790527344, 213.64382934570312, -19.033212661743164, 215.51617431640625, 77.7904281616211, 791.1576538085938, 174.6196746826172, 204.67544555664062, 554.5697021484375, 50.7972412109375, -331.74371337890625, -319.7526550292969, 240.47869873046875, 208.6339569091797, 46.69889831542969, 190.79855346679688, 473.9967956542969, 496.2760925292969, 374.10577392578125, 380.3115234375, 166.14439392089844, -156.0614013671875, 775.1561889648438, 135.57081604003906, 6.185993194580078, 51.983428955078125, 185.58734130859375, 547.9680786132812, 276.90472412109375, -171.60369873046875, 297.45428466796875, 340.4999694824219, 84.23908233642578, 532.0550537109375, 270.3668212890625, 354.3951110839844, 724.2979736328125, 660.955322265625, 459.25054931640625, 412.2721862792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 196.5434112548828, "std": 315.7867736816406, "min": -833.4735107421875, "p10": -139.1386306762695, "median": 189.46060943603516, "p90": 576.9329284667969, "max": 1067.30419921875, "pos_frac": 0.734375, "sample": [-70.93243408203125, -308.32440185546875, 378.1640930175781, 219.42958068847656, 172.1090850830078, 342.14398193359375, -86.6004409790039, -236.9063720703125, -118.44790649414062, 193.7703094482422, 509.6694030761719, 467.8240661621094, -266.78369140625, 185.15090942382812, 575.5882568359375, 125.4706039428711, 233.58663940429688, 249.1338653564453, 138.39154052734375, 786.1893310546875, 438.3092956542969, -159.82907104492188, 352.0496520996094, 40.79240036010742, 651.9801635742188, -105.76580047607422, -17.22534942626953, 119.29400634765625, -120.90420532226562, 181.6020050048828, 303.0626220703125, 656.4725952148438, 543.7303466796875, -833.4735107421875, 36.45343780517578, 1067.30419921875, -70.0630111694336, 111.28659057617188, -305.7100830078125, 200.86672973632812, 496.0109558105469, -102.42723846435547, 577.5092163085938, -30.271656036376953, 212.28662109375, 227.0340576171875, 83.15117645263672, 202.9640655517578, 735.2702026367188, 545.56591796875, -88.44019317626953, 174.2865753173828, -146.95338439941406, 195.02137756347656, 81.60841369628906, 473.1543273925781, 344.9891357421875, 390.61627197265625, 199.85134887695312, 155.6649627685547, 788.358154296875, 301.3734130859375, 54.50445556640625, 128.79061889648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 235.8787384033203, "std": 290.82684326171875, "min": -527.23583984375, "p10": -74.96406936645506, "median": 256.0280456542969, "p90": 567.0216369628906, "max": 869.446044921875, "pos_frac": 0.8125, "sample": [156.84152221679688, 309.5304870605469, -78.56709289550781, 73.68335723876953, 554.1390991210938, 319.06317138671875, 473.6495666503906, 460.1228942871094, 510.3213806152344, 26.979543685913086, 121.60166931152344, 218.45974731445312, 6.5851898193359375, 473.9327392578125, -46.387733459472656, -124.2781982421875, 574.8896484375, -489.93890380859375, -86.87942504882812, 402.79876708984375, 483.17218017578125, 764.5584716796875, -39.46485900878906, 108.44789123535156, 552.355224609375, 643.271240234375, 415.9500732421875, -17.03055191040039, 304.25482177734375, 869.446044921875, 279.0323181152344, 32.936893463134766, 323.2472229003906, 36.497772216796875, 419.04730224609375, -62.397857666015625, -66.55701446533203, 18.37339210510254, 107.658203125, -212.9278564453125, 212.3544158935547, 405.74884033203125, 167.0377197265625, 395.5616455078125, 233.02377319335938, -527.23583984375, 290.2525939941406, 108.24678039550781, 696.755859375, 368.7322998046875, 132.64739990234375, 22.907629013061523, 480.69647216796875, 572.542724609375, 285.9205322265625, 771.82373046875, 121.44526672363281, 490.85931396484375, -405.52996826171875, 209.77207946777344, 366.4223327636719, 0.6447658538818359, 509.2724609375, 369.91766357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 230.70909118652344, "std": 343.71893310546875, "min": -632.426025390625, "p10": -90.77662200927735, "median": 184.63556671142578, "p90": 671.6718750000002, "max": 1408.5560302734375, "pos_frac": 0.75, "sample": [200.74700927734375, -64.64273834228516, -129.64498901367188, -5.228065490722656, 591.8232421875, 260.0802307128906, 154.10028076171875, 185.51602172851562, 183.75511169433594, 379.4820556640625, 227.45303344726562, -11.851480484008789, 127.92052459716797, 16.938907623291016, 92.64800262451172, -53.57373046875, 408.1510314941406, 254.96620178222656, 100.06805419921875, 424.8401794433594, 880.0967407226562, 279.2120666503906, -90.68069458007812, -29.47075843811035, 69.5765151977539, 205.49676513671875, 246.70216369628906, 317.81488037109375, 102.11639404296875, 1093.0447998046875, 94.36529541015625, 396.7589416503906, 930.8134155273438, -124.71724700927734, 577.1226196289062, -15.181671142578125, 270.62176513671875, 694.0955200195312, 619.3500366210938, 150.92962646484375, 39.337425231933594, 369.7733154296875, 97.80298614501953, 276.16943359375, 143.78680419921875, 334.77301025390625, -35.84238815307617, -632.426025390625, 1408.5560302734375, 282.09832763671875, 163.88270568847656, 271.2453918457031, 556.2384643554688, 788.2689208984375, 590.8577880859375, -256.718994140625, -59.483070373535156, -414.65008544921875, 199.09466552734375, 21.066312789916992, 787.176513671875, -134.2936248779297, 47.86936950683594, -90.81773376464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 253.1304931640625, "std": 328.23455810546875, "min": -595.6657104492188, "p10": -106.30849990844726, "median": 255.26239013671875, "p90": 694.6133666992188, "max": 1117.9539794921875, "pos_frac": 0.765625, "sample": [-63.82079315185547, 161.49134826660156, 474.83929443359375, 529.036865234375, 297.86737060546875, -99.97478485107422, 682.7488403320312, -64.9773941040039, 267.25555419921875, 690.0323486328125, 270.871337890625, 351.8661193847656, 71.65966796875, 455.40081787109375, 1.6921215057373047, 700.0023803710938, 407.1143798828125, 78.50440216064453, -564.8444213867188, 243.04794311523438, 75.48316192626953, 5.651985168457031, 586.7341918945312, 263.362060546875, 90.15812683105469, 385.6208801269531, -25.448699951171875, 389.857177734375, -113.53093719482422, 247.1627197265625, -36.64940643310547, 72.0461196899414, 194.40896606445312, 279.1739196777344, 158.6228485107422, 15.10279655456543, -190.3847198486328, -154.6533966064453, 110.79564666748047, 884.9666748046875, 37.67242431640625, -595.6657104492188, 464.03472900390625, 623.9534301757812, 673.0371704101562, -172.3134307861328, 469.6165771484375, 700.2470703125, -54.40673828125, 498.8423156738281, 328.8270568847656, -19.071840286254883, 78.65339660644531, 712.7257080078125, 698.793701171875, 1117.9539794921875, -55.3880500793457, 386.191162109375, 403.7828369140625, -109.02294921875, 445.3824462890625, 237.95233154296875, 696.57666015625, 503.6831970214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 244.2897186279297, "std": 311.13800048828125, "min": -940.4296264648438, "p10": -118.79284896850584, "median": 253.27735900878906, "p90": 615.6925537109377, "max": 998.5614624023438, "pos_frac": 0.875, "sample": [110.17117309570312, 29.206256866455078, 401.68621826171875, 443.1388244628906, 150.72308349609375, 119.66252136230469, 125.25103759765625, 254.59262084960938, 57.249122619628906, 214.02072143554688, 712.7627563476562, 980.7496337890625, -95.18964385986328, 251.96209716796875, 48.239723205566406, 16.26512336730957, 395.75543212890625, 39.724666595458984, 87.28334045410156, 627.087646484375, 72.16835021972656, 646.4810180664062, 410.9121398925781, -940.4296264648438, 134.75506591796875, 308.27679443359375, 302.86419677734375, 7.073234558105469, 428.00933837890625, 998.5614624023438, 230.5380401611328, 387.8749084472656, 589.10400390625, 314.046630859375, -227.53787231445312, 467.157958984375, 294.03863525390625, 222.3209228515625, 325.8384704589844, 352.3487548828125, 160.9163055419922, 137.7389678955078, 485.6929626464844, -197.80972290039062, 560.1712036132812, 471.37615966796875, 277.9142150878906, -295.7372131347656, -351.6568603515625, -264.61834716796875, 101.27876281738281, 482.09307861328125, 411.36700439453125, 641.5069580078125, 31.726238250732422, 491.6624450683594, 401.9386291503906, 644.29833984375, 202.7418975830078, 347.5897216796875, 155.7283935546875, 251.3335723876953, 321.45391845703125, -128.90850830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 269.3136901855469, "std": 322.754150390625, "min": -749.0953979492188, "p10": -70.28399887084959, "median": 256.5883331298828, "p90": 673.1127136230471, "max": 1096.38037109375, "pos_frac": 0.828125, "sample": [376.245361328125, 257.2325134277344, 383.25726318359375, 251.50489807128906, -24.17898178100586, 926.4356079101562, 298.959228515625, -749.0953979492188, 111.95652770996094, 180.7137451171875, 365.80352783203125, 234.07647705078125, 471.1199951171875, 877.6083984375, 270.1459045410156, -41.27992248535156, 430.17083740234375, 60.35467529296875, -82.71431732177734, 186.82398986816406, 314.67132568359375, 458.5489501953125, -159.86180114746094, 205.99276733398438, 540.8424682617188, 341.44549560546875, -286.0024108886719, 8.29737663269043, 472.35498046875, -19.064743041992188, 545.2491455078125, 489.9697265625, 10.433460235595703, 319.34332275390625, 724.5046997070312, 116.021240234375, 475.82574462890625, -82.93400573730469, -324.56610107421875, 215.7863006591797, 341.1746826171875, 255.94415283203125, 692.5812377929688, -171.33493041992188, 92.60150146484375, 532.2159423828125, 36.15606689453125, 627.6861572265625, 116.49250030517578, -19.315723419189453, 226.8127899169922, 211.0054168701172, 153.30499267578125, 63.63134765625, 265.30194091796875, 289.4210205078125, 893.009765625, 13.597381591796875, 479.9442138671875, 992.9955444335938, 1096.38037109375, 408.0601806640625, 131.00376892089844, 355.40692138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 208.97039794921875, "std": 313.83612060546875, "min": -809.3323974609375, "p10": -153.60065612792968, "median": 222.4354248046875, "p90": 637.7241333007813, "max": 1014.389404296875, "pos_frac": 0.765625, "sample": [336.0704040527344, -163.10882568359375, 413.484619140625, 837.0416259765625, -157.2664031982422, 711.917236328125, 572.9298095703125, 127.8598861694336, -8.839622497558594, 217.6315460205078, 489.0483703613281, 335.2336120605469, 499.1878662109375, 371.8639221191406, -125.96382904052734, 60.05693435668945, 229.6783447265625, -255.69284057617188, 290.54888916015625, 227.2393035888672, 8.178953170776367, 77.02630615234375, 3.35260009765625, 649.0050048828125, -157.19454956054688, 67.74088287353516, 760.7784423828125, -101.66593170166016, 123.11383819580078, 781.5568237304688, -145.21490478515625, 369.3182373046875, 328.9309387207031, 50.41619873046875, 1014.389404296875, -241.06227111816406, -101.74781799316406, 120.1988525390625, 383.31854248046875, 784.0543212890625, 150.46742248535156, 275.39300537109375, 246.088134765625, 79.81524658203125, 131.22476196289062, 171.2732391357422, -13.101232528686523, 309.5242919921875, 398.4093017578125, -79.2219009399414, 248.8077392578125, 446.5937194824219, 258.5271301269531, 37.300048828125, 338.53070068359375, -6.154966354370117, 286.0832824707031, 17.353668212890625, 378.8543701171875, -271.7456359863281, 611.402099609375, 97.38114929199219, -809.3323974609375, 287.2178955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 317.17578125, "std": 296.93975830078125, "min": -185.34222412109375, "p10": -86.94971923828125, "median": 297.5453338623047, "p90": 675.4453002929688, "max": 1063.880615234375, "pos_frac": 0.84375, "sample": [811.9292602539062, 81.74085235595703, 264.5537109375, 416.46923828125, -98.59493255615234, 532.9894409179688, 259.46575927734375, 37.89842987060547, 542.3731079101562, 616.3674926757812, -91.77433013916016, -4.429351806640625, 89.93946075439453, 352.1640625, 263.9356994628906, 582.0007934570312, 532.3751831054688, 0.00521087646484375, 583.7206420898438, 198.65757751464844, 422.1567687988281, 497.5319519042969, 90.41255187988281, 209.05032348632812, 655.23583984375, 191.90907287597656, 343.3585205078125, 745.9104614257812, 71.96829223632812, 795.2688598632812, 336.9678955078125, 541.0617065429688, 1063.880615234375, -185.34222412109375, -153.465576171875, 60.909568786621094, 588.1246337890625, 493.922119140625, 29.16545867919922, 832.7872314453125, -39.39636993408203, 978.9902954101562, 316.27593994140625, 199.6333770751953, 548.2632446289062, 483.9456787109375, 331.12872314453125, 609.6666870117188, 278.8147277832031, 97.42151641845703, 52.91770553588867, 246.3002471923828, -87.40823364257812, 681.11962890625, 344.0788269042969, -95.23211669921875, 358.916748046875, -85.87985229492188, 129.86468505859375, -130.2972869873047, 98.23336029052734, 213.1627197265625, 503.92315673828125, 662.2052001953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 265.19757080078125, "std": 273.15289306640625, "min": -359.24462890625, "p10": -94.64552612304686, "median": 274.92132568359375, "p90": 668.8948852539063, "max": 871.2491455078125, "pos_frac": 0.84375, "sample": [86.40538787841797, 39.56288146972656, -5.412115097045898, 270.56817626953125, 14.186443328857422, 558.7474365234375, 33.61585998535156, 279.27447509765625, -161.81716918945312, 674.4728393554688, 1.9660987854003906, 668.4320068359375, 385.5684814453125, 69.75900268554688, 181.5325164794922, -114.07545471191406, -143.05934143066406, 737.8098754882812, 703.7086181640625, 90.28909301757812, 669.09326171875, 283.2391052246094, 823.2508544921875, 117.2589111328125, 610.6478881835938, 871.2491455078125, 337.85552978515625, 252.30599975585938, 238.06137084960938, 383.0673828125, 77.73145294189453, 76.47157287597656, -46.35511016845703, 121.82816314697266, 448.7768859863281, 595.3901977539062, 137.44277954101562, 303.85089111328125, 460.3649597167969, 285.8050537109375, 75.83047485351562, 373.6109924316406, 302.199951171875, 302.2003173828125, -102.60955810546875, 343.40081787109375, 163.578369140625, 116.70345306396484, 459.03704833984375, 330.3131103515625, 419.5108642578125, -359.24462890625, 396.1072692871094, 392.1716003417969, -80.53304290771484, -243.87478637695312, 218.96820068359375, 144.68359375, 674.0611572265625, 484.735107421875, 558.7608032226562, 153.59739685058594, 531.2586669921875, -100.69373321533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 268.684326171875, "std": 371.0915832519531, "min": -403.1644287109375, "p10": -170.99675292968746, "median": 225.13748168945312, "p90": 643.6613098144531, "max": 1292.5797119140625, "pos_frac": 0.765625, "sample": [213.89105224609375, -231.24583435058594, 236.3839111328125, -49.90547561645508, 82.50776672363281, 1063.75146484375, 88.2916030883789, 69.72149658203125, 572.4496459960938, 591.3245849609375, -28.10479736328125, 14.48016357421875, 262.92120361328125, 48.228004455566406, 1120.4989013671875, 448.2515869140625, 463.98333740234375, -186.72901916503906, 359.8679504394531, -52.75848388671875, -260.3280334472656, 630.3984375, -234.54302978515625, 586.5914306640625, 191.32179260253906, 40.35337829589844, 306.34735107421875, 643.4213256835938, 643.76416015625, 262.2257995605469, 580.5283203125, 69.20118713378906, 167.34820556640625, 794.0166015625, -367.49517822265625, -59.32280731201172, -110.7776870727539, 378.906494140625, 239.37937927246094, 594.6493530273438, 1292.5797119140625, -68.6961898803711, 17.54791259765625, 154.32168579101562, 336.7107849121094, 160.62428283691406, 188.10455322265625, 258.8110046386719, -341.68310546875, -403.1644287109375, 391.3863525390625, 99.47145080566406, 611.6832275390625, -134.2881317138672, 619.1914672851562, 264.5476379394531, 643.2738037109375, 1039.6099853515625, 157.12783813476562, 37.65571212768555, 535.4320068359375, 908.7089233398438, 272.1887512207031, -29.14410400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 299.801513671875, "std": 312.16278076171875, "min": -437.69415283203125, "p10": -36.94102859497068, "median": 266.0982971191406, "p90": 715.4903381347657, "max": 935.152099609375, "pos_frac": 0.859375, "sample": [731.5507202148438, -346.6502685546875, 387.8135681152344, 601.555908203125, 71.85001373291016, 710.7171020507812, 6.230747222900391, -5.557483673095703, 45.26324462890625, 691.8975830078125, 778.126708984375, 194.84274291992188, 329.5378112792969, 538.1503295898438, 526.417724609375, 487.7884216308594, 227.5186767578125, 169.93374633789062, -437.69415283203125, 81.08080291748047, -44.69438934326172, 774.444580078125, -250.8469696044922, 258.0342102050781, 591.29833984375, -292.52288818359375, 194.60659790039062, 189.2682342529297, 386.9149169921875, 851.758056640625, 365.1871337890625, 63.27886962890625, 433.43011474609375, 256.64312744140625, 433.35137939453125, 129.82940673828125, 187.19435119628906, 66.51275634765625, 430.9371032714844, 276.71527099609375, 585.685546875, 189.61380004882812, 887.140625, 67.03733825683594, 87.79774475097656, 95.05570983886719, 26.295900344848633, -184.75686645507812, 703.69970703125, 710.6718139648438, 497.0769348144531, 491.16937255859375, 54.0667724609375, 935.152099609375, 402.45733642578125, 415.3501281738281, 430.8172607421875, 274.1623840332031, 107.23110961914062, 717.5360107421875, -76.84501647949219, -18.849853515625, 243.47802734375, 454.5395812988281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 177.66058349609375, "std": 397.99420166015625, "min": -911.9642333984375, "p10": -234.21712036132809, "median": 151.47918701171875, "p90": 606.9648620605469, "max": 1210.8336181640625, "pos_frac": 0.640625, "sample": [-143.67449951171875, 194.99990844726562, 34.627464294433594, -76.60226440429688, 557.6695556640625, -140.76699829101562, -208.13742065429688, -6.575225830078125, 276.50030517578125, 641.231201171875, -55.14472961425781, -29.25701141357422, -16.680194854736328, 174.6321258544922, -40.23004150390625, 91.88349151611328, 569.9344482421875, 338.9622497558594, 62.86130905151367, 209.35855102539062, -17.865602493286133, 266.4817199707031, -245.39413452148438, 348.8012390136719, -146.8155517578125, 151.16262817382812, -64.35676574707031, 512.5111083984375, 221.9090576171875, -132.16757202148438, 612.77392578125, 477.78656005859375, 423.83843994140625, -314.80047607421875, 151.79574584960938, 216.71994018554688, -279.6064453125, 40.88813018798828, 597.0673217773438, 784.61669921875, -35.877586364746094, -911.9642333984375, 611.2066650390625, 428.6180114746094, 587.977783203125, 168.89816284179688, 501.4404296875, 445.8638000488281, 432.19866943359375, 352.063720703125, 49.988975524902344, -537.9786376953125, 285.1934814453125, 48.397281646728516, -37.3798713684082, 1136.2059326171875, -749.2393188476562, 107.64338684082031, 140.79820251464844, 465.513671875, 1210.8336181640625, -148.62411499023438, -407.70953369140625, 1185.2711181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 210.0072021484375, "std": 307.3924255371094, "min": -845.4775390625, "p10": -174.60533599853514, "median": 264.8617858886719, "p90": 594.3308959960938, "max": 843.0255126953125, "pos_frac": 0.765625, "sample": [313.2799072265625, 95.05496215820312, 105.11328887939453, -94.73328399658203, 212.26010131835938, 369.6043395996094, 800.0160522460938, -50.716949462890625, 106.22676086425781, 87.78965759277344, 328.55633544921875, 15.95309066772461, 566.94482421875, 681.075927734375, 487.8538818359375, 601.109375, 362.1134033203125, -179.11741638183594, 117.48455047607422, 275.2109069824219, 778.64599609375, 399.08929443359375, 57.49652099609375, -266.4364013671875, 265.73956298828125, 260.27105712890625, 459.64715576171875, 311.99810791015625, -12.406455993652344, 263.9840087890625, 596.6322021484375, 385.7740783691406, 588.961181640625, 311.1318664550781, 272.3389892578125, -363.72784423828125, 465.84075927734375, 353.2044677734375, -408.3171691894531, 642.048828125, 313.9130859375, -19.66249656677246, 370.38739013671875, 843.0255126953125, 145.78622436523438, 423.412109375, 187.44566345214844, -21.77116584777832, 34.064613342285156, 455.51092529296875, 81.58699035644531, -26.398887634277344, 283.01715087890625, -845.4775390625, -205.1733856201172, 133.28038024902344, 128.9715576171875, -164.0771484375, -93.7083740234375, 304.1456298828125, 131.75497436523438, -266.9074401855469, 353.5947570800781, 330.74468994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 244.8833465576172, "std": 342.9683837890625, "min": -800.907470703125, "p10": -150.4199264526367, "median": 248.4316177368164, "p90": 678.8565002441406, "max": 1254.5877685546875, "pos_frac": 0.765625, "sample": [226.0070037841797, 685.8941040039062, 816.1500244140625, 498.69189453125, 369.61822509765625, -285.722900390625, -444.25054931640625, 734.7052001953125, -76.5217514038086, 1254.5877685546875, -83.94721984863281, 201.9371795654297, 219.64585876464844, 63.05377197265625, 363.35955810546875, 250.31448364257812, 36.119590759277344, -155.17068481445312, -7.532985687255859, 213.97813415527344, 31.636672973632812, 723.5133056640625, 397.82220458984375, 106.00045776367188, 568.8616943359375, 207.42526245117188, 173.8013916015625, 675.716796875, 274.0245666503906, 635.876708984375, 724.4490966796875, 394.64678955078125, 463.2384948730469, 261.8489990234375, 341.7018127441406, 373.00091552734375, 315.8038024902344, -396.8264465332031, 325.5680236816406, -18.42681884765625, -39.37953567504883, 414.0025939941406, 188.49734497070312, -800.907470703125, -41.820369720458984, 680.2020874023438, 516.3511352539062, 85.71455383300781, 636.3677368164062, -21.371076583862305, 419.80914306640625, 246.5487518310547, 69.69000244140625, 36.07429885864258, 417.24737548828125, 470.8505554199219, 266.482421875, -147.40187072753906, 470.5764465332031, 503.4898376464844, -151.71337890625, 119.31925964355469, 230.6632080078125, -357.3594970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 269.41815185546875, "std": 303.3704528808594, "min": -473.7216796875, "p10": -95.74510345458984, "median": 254.33689880371094, "p90": 687.9962280273438, "max": 1001.848876953125, "pos_frac": 0.78125, "sample": [287.9732666015625, 111.29814910888672, 228.27696228027344, 32.44390869140625, -96.3218994140625, -309.9267272949219, 798.8297119140625, 265.7041320800781, 527.609375, 472.1963806152344, 596.9456787109375, -191.1962890625, 795.1917724609375, 286.6270751953125, 504.5910339355469, -43.63762283325195, 758.527099609375, 815.3391723632812, 840.0972900390625, 696.6553955078125, -55.8238525390625, -18.62158203125, 393.698486328125, 173.981201171875, 642.03857421875, 212.217041015625, 175.0056915283203, 66.91421508789062, -473.7216796875, -176.68161010742188, 410.1368103027344, -134.69544982910156, 261.2948913574219, 350.1736145019531, 667.79150390625, 248.99588012695312, -28.086076736450195, 439.16009521484375, -164.7019805908203, 89.744140625, 369.9179382324219, 561.4287109375, 345.0556945800781, 302.1653137207031, 1001.848876953125, 176.1471710205078, 497.6313171386719, -21.249801635742188, 466.77001953125, 117.90321350097656, 72.30952453613281, 214.45506286621094, 476.36724853515625, 407.41326904296875, 194.41436767578125, 259.67791748046875, -94.39924621582031, 192.79312133789062, 177.41583251953125, -31.862945556640625, 524.1112060546875, 101.2615737915039, 129.98818969726562, 345.1542053222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 242.24057006835938, "std": 344.1079406738281, "min": -457.30865478515625, "p10": -216.79139404296876, "median": 236.2024154663086, "p90": 584.0891174316407, "max": 1203.0673828125, "pos_frac": 0.78125, "sample": [-219.37750244140625, 290.315673828125, 48.29527282714844, -34.264766693115234, -124.86947631835938, 547.268798828125, 427.4880065917969, 304.2240295410156, 83.48917388916016, -457.30865478515625, 236.89224243164062, 300.5502014160156, -70.35417175292969, -225.6914520263672, 261.06005859375, 668.4671630859375, 469.2994384765625, 767.8129272460938, 116.46421813964844, 472.7410583496094, 568.9357299804688, 235.51258850097656, 588.5146484375, 510.69097900390625, 295.12451171875, -144.44683837890625, 224.09414672851562, 66.72830963134766, -214.17575073242188, -217.91238403320312, 214.6417694091797, 1187.7340087890625, -243.71963500976562, 294.2734680175781, 354.5552978515625, -95.9892578125, 358.5516662597656, -298.48431396484375, 206.42974853515625, 52.514434814453125, -113.7832260131836, 136.79306030273438, -360.0296630859375, 22.78714370727539, 276.9828796386719, 359.3005676269531, 184.70297241210938, 50.427772521972656, 413.3405456542969, 1203.0673828125, 309.69873046875, 573.7628784179688, 424.3365173339844, 240.77035522460938, 390.84454345703125, 101.60923767089844, 858.2034301757812, 188.03616333007812, 185.07644653320312, 1131.831787109375, 547.5645141601562, 352.4435729980469, 100.57283020019531, 118.98150634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 208.72262573242188, "std": 315.2806396484375, "min": -711.0782470703125, "p10": -121.58017272949218, "median": 217.22988891601562, "p90": 567.4303649902345, "max": 878.7616577148438, "pos_frac": 0.8125, "sample": [-125.28684997558594, 53.14288330078125, 388.8359680175781, -357.00274658203125, -539.0200805664062, 213.5606689453125, 117.99809265136719, 305.5693664550781, 71.26589965820312, 224.14830017089844, 89.38101959228516, 517.4711303710938, -711.0782470703125, 2.0341453552246094, 194.4473419189453, 99.77044677734375, 89.1875991821289, 253.08843994140625, 549.9943237304688, 101.26841735839844, 268.453125, 112.70770263671875, -32.3347282409668, 365.3387451171875, 600.7796020507812, 131.65557861328125, 531.435302734375, 97.87672424316406, -112.93125915527344, 878.7616577148438, 117.23084259033203, 578.78076171875, 691.1563720703125, -97.82920837402344, 220.89910888671875, 337.5702209472656, 574.9029541015625, 173.6051788330078, 106.26077270507812, 767.4244384765625, -535.8037719726562, 478.95635986328125, 100.88832092285156, -340.1159973144531, 532.7077026367188, 370.88153076171875, 266.87298583984375, 254.1695556640625, 9.206329345703125, 462.4136657714844, 232.01422119140625, 288.12994384765625, -93.49720764160156, -109.51251220703125, 303.0634765625, 461.195068359375, 537.7303466796875, 867.4364013671875, 432.4246520996094, 271.06488037109375, 499.767333984375, -126.7957992553711, 211.39794921875, 133.13259887695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 238.2259979248047, "std": 315.68780517578125, "min": -795.3321533203125, "p10": -88.8562759399414, "median": 183.45751190185547, "p90": 687.6267395019534, "max": 844.1328735351562, "pos_frac": 0.796875, "sample": [233.0924072265625, 115.82929229736328, 112.92462921142578, 160.11300659179688, 235.15301513671875, -86.29582214355469, 150.19189453125, 229.8550567626953, 726.2189331054688, 72.28187561035156, 148.27674865722656, 202.41656494140625, -71.04498291015625, 2.743356704711914, 617.9389038085938, -188.49859619140625, 68.39878845214844, 99.12179565429688, 516.8665161132812, 187.29574584960938, -102.83451080322266, 715.0498046875, -32.602928161621094, 272.9970397949219, 141.3324432373047, 141.7291259765625, -151.39942932128906, 396.0875244140625, 821.636474609375, 549.8748168945312, 244.4306182861328, 623.6395874023438, 70.88880157470703, 264.1723937988281, 179.61927795410156, 780.6666259765625, 53.26824951171875, -360.83917236328125, 84.5564193725586, 463.7042236328125, -89.95361328125, 313.41839599609375, 844.1328735351562, 179.33795166015625, 470.6812744140625, 88.6015853881836, 353.9872741699219, 471.685791015625, -76.72308349609375, 98.13346862792969, -345.67620849609375, 376.2607727050781, -12.471996307373047, 435.4573974609375, 532.2640991210938, 349.356689453125, -4.984979629516602, 502.8016357421875, 791.31103515625, -795.3321533203125, 588.5814819335938, 784.9524536132812, 132.11007690429688, 569.6756591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 182.03079223632812, "std": 289.1174621582031, "min": -403.00799560546875, "p10": -162.90760498046876, "median": 166.22940063476562, "p90": 463.24920043945326, "max": 1073.2586669921875, "pos_frac": 0.75, "sample": [296.9417724609375, 1.8198051452636719, 185.799072265625, 432.0650634765625, 893.3880615234375, 42.36711883544922, -10.105951309204102, 64.58043670654297, 409.7970886230469, 183.82546997070312, -10.7117919921875, 97.65348815917969, 133.31961059570312, 162.53895568847656, 169.9198455810547, 270.7332763671875, 373.6275329589844, 963.673095703125, 42.100685119628906, -224.5692138671875, 25.886131286621094, 107.64581298828125, 297.19024658203125, 371.6007385253906, 245.39443969726562, 202.6820068359375, 283.69390869140625, 867.9140625, -25.476608276367188, 219.8484344482422, -163.29296875, 531.8018798828125, 279.58544921875, -141.92568969726562, -87.39537048339844, 301.955810546875, 333.92156982421875, -80.44882202148438, 578.0888061523438, 193.99819946289062, 304.637939453125, 29.712297439575195, 81.39812469482422, 157.19166564941406, -403.00799560546875, 203.94424438476562, 413.60015869140625, 370.768798828125, -254.3062744140625, -277.3401184082031, 476.61383056640625, 133.54986572265625, 424.6072082519531, -16.520790100097656, 1073.2586669921875, -162.0084228515625, 90.57379150390625, 291.9731750488281, 295.6476135253906, 87.04772186279297, -123.4530258178711, -206.97032165527344, 10.728065490722656, -173.10765075683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 265.2386474609375, "std": 382.43719482421875, "min": -751.4483642578125, "p10": -119.9306900024414, "median": 244.24639129638672, "p90": 718.5638122558595, "max": 1392.82373046875, "pos_frac": 0.734375, "sample": [-97.47929382324219, -172.18264770507812, -28.736608505249023, 509.0354309082031, 883.580322265625, 388.4215393066406, 157.9692840576172, 600.0313110351562, 468.00250244140625, 1165.6265869140625, -153.06570434570312, 114.16021728515625, 503.3454284667969, 327.9858703613281, 447.5610046386719, -90.4468994140625, -617.720947265625, 665.0590209960938, 424.89471435546875, 606.6265869140625, 473.0047912597656, 234.9937744140625, -64.54473876953125, 28.87417221069336, 488.8581237792969, 701.0665893554688, 342.58294677734375, 273.30072021484375, -103.89089965820312, 210.93385314941406, -97.2027816772461, 395.91180419921875, 811.648681640625, -72.97154998779297, 316.1092834472656, 1.2057056427001953, 592.799072265625, 525.7796020507812, 39.69856643676758, 81.86236572265625, 91.770751953125, 169.037109375, -1.0479202270507812, -71.58560943603516, -471.6654968261719, -59.54109191894531, 123.43034362792969, 166.57369995117188, 1392.82373046875, 179.08209228515625, 528.7490844726562, 458.46820068359375, 522.6433715820312, -751.4483642578125, 557.022216796875, 253.49900817871094, 735.9093017578125, 726.0626220703125, 359.259765625, 169.3567657470703, 741.216552734375, -126.80488586425781, 207.22569274902344, -207.45065307617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 195.0811767578125, "std": 338.138671875, "min": -734.2245483398438, "p10": -214.3719314575195, "median": 222.55995178222656, "p90": 603.3088378906251, "max": 932.4102783203125, "pos_frac": 0.71875, "sample": [87.1622543334961, 145.22015380859375, -392.1302490234375, 367.5560607910156, -90.4283218383789, 657.7444458007812, -163.5472412109375, 147.71759033203125, 123.24029541015625, 367.80377197265625, -468.2447509765625, 223.90170288085938, 88.85478973388672, 61.093589782714844, -357.01055908203125, 313.4775695800781, 256.0910949707031, -224.1045684814453, 749.6429443359375, -189.6459503173828, 819.2623901367188, 932.4102783203125, 53.47576904296875, 443.8105773925781, 226.98040771484375, -2.2164268493652344, 345.78106689453125, -21.962238311767578, 588.9076538085938, 447.61102294921875, 221.21820068359375, 336.6234130859375, 380.78302001953125, -734.2245483398438, -92.4108657836914, -164.9452667236328, -87.3460922241211, 215.25917053222656, 289.541015625, -14.760061264038086, 609.4807739257812, 411.43096923828125, 342.70562744140625, 17.548919677734375, 803.1990966796875, 297.8974914550781, 555.3660278320312, -191.66244506835938, 200.9593048095703, -324.4574890136719, 366.0334777832031, 157.43251037597656, 561.4308471679688, 268.214111328125, 24.824270248413086, -448.5682067871094, -18.678604125976562, 296.8233642578125, 129.21853637695312, 550.04052734375, 340.7672119140625, 698.297607421875, 517.6622314453125, 431.03515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 214.42791748046875, "std": 282.29608154296875, "min": -333.74090576171875, "p10": -100.32765045166015, "median": 196.34734344482422, "p90": 663.7582336425781, "max": 1098.390869140625, "pos_frac": 0.8125, "sample": [303.9425354003906, 211.61448669433594, 12.393226623535156, -125.59494018554688, 715.3797607421875, 400.2921142578125, 64.86923217773438, 227.86123657226562, 218.3258056640625, 516.0690307617188, 33.03804397583008, -59.880760192871094, 56.45612335205078, -60.131935119628906, 221.71275329589844, 670.9155883789062, -299.37835693359375, 429.03173828125, 51.23750305175781, 87.40432739257812, 210.52967834472656, 114.8582534790039, 200.0557861328125, 160.64407348632812, -82.44512176513672, 148.20668029785156, 300.5207824707031, 344.5289001464844, -146.57894897460938, 209.5851593017578, 128.08872985839844, -137.4314727783203, 1098.390869140625, 16.987533569335938, 130.516845703125, 699.4576416015625, 75.05626678466797, -333.74090576171875, 27.017736434936523, 238.26649475097656, 414.04803466796875, 647.0577392578125, 388.7251281738281, 101.39765167236328, 188.61184692382812, 708.2091674804688, 55.97795104980469, -88.81521606445312, 467.5126647949219, 192.63890075683594, 207.6791534423828, -105.26155090332031, 406.95733642578125, 237.22422790527344, -59.50258255004883, 481.88623046875, 332.22503662109375, -244.93002319335938, 388.5020751953125, 825.1929931640625, 750.9254760742188, 256.7681884765625, 52.56995391845703, 39.7130012512207], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 280.59796142578125, "std": 310.6165466308594, "min": -517.050537109375, "p10": -79.44561233520506, "median": 259.46690368652344, "p90": 582.9068054199219, "max": 1030.6866455078125, "pos_frac": 0.828125, "sample": [-248.5239715576172, 241.2506866455078, -206.65069580078125, 534.5101318359375, 209.87619018554688, 135.09603881835938, 884.481689453125, 132.29611206054688, 375.59527587890625, 502.015380859375, 242.66384887695312, 410.3586730957031, 15.162874221801758, -46.73500061035156, -60.44251251220703, 145.9878387451172, -28.656585693359375, -517.050537109375, 457.15936279296875, -211.3233184814453, 580.7640380859375, 400.64532470703125, 1005.744384765625, 857.9122924804688, 90.95643615722656, 421.5048522949219, 378.8199462890625, 452.9486083984375, 219.88865661621094, 428.06109619140625, 661.904541015625, 215.55081176757812, 232.80914306640625, 153.70484924316406, 436.53753662109375, -326.3669128417969, 954.9066162109375, -87.58979797363281, 284.4272155761719, 2.2412796020507812, 479.1850280761719, 277.614501953125, 1030.6866455078125, -24.383041381835938, 174.6314239501953, 184.21328735351562, 438.042236328125, 92.56705474853516, 291.2950134277344, 262.00299072265625, 402.0540466308594, 485.66070556640625, 570.9036254882812, 348.82830810546875, 497.0406188964844, 462.47076416015625, 163.095703125, 81.68732452392578, -188.201171875, 256.9308166503906, 583.8251342773438, 113.26220703125, 129.2552032470703, 513.1593627929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 292.85833740234375, "std": 292.06341552734375, "min": -225.74261474609375, "p10": -36.51548385620117, "median": 260.55615234375, "p90": 702.3488647460939, "max": 1279.490966796875, "pos_frac": 0.84375, "sample": [186.82191467285156, 217.86331176757812, 283.79925537109375, 271.6606140136719, 183.90309143066406, 154.6298065185547, 130.63848876953125, -79.89097595214844, 257.2055358886719, -35.84062194824219, 43.31584548950195, 21.351970672607422, 635.8367919921875, 491.9966125488281, -49.97542190551758, 1279.490966796875, 724.9214477539062, 280.9206848144531, 30.919082641601562, 188.4403076171875, 131.28219604492188, 147.18093872070312, 247.62228393554688, 50.089202880859375, 541.0496215820312, 266.0900573730469, 674.9412841796875, 153.67381286621094, 560.7288818359375, 167.34130859375, 296.4990539550781, -36.804710388183594, -183.63035583496094, 287.1009521484375, -225.74261474609375, 523.195556640625, 230.55433654785156, -129.24549865722656, 390.791748046875, 117.86997985839844, 245.85720825195312, -10.473846435546875, 263.9067687988281, 451.97137451171875, 296.75787353515625, 324.41302490234375, 499.00360107421875, 499.58984375, 452.7248840332031, 275.81378173828125, 192.3488006591797, 444.3405456542969, -2.3134918212890625, 929.4752197265625, 325.7867736816406, 138.1455078125, 753.532958984375, 392.16571044921875, 611.2478637695312, 863.2999267578125, -179.4434814453125, 815.75390625, 16.33661651611328, 714.094970703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 191.52005004882812, "std": 328.77325439453125, "min": -838.126220703125, "p10": -184.88922882080078, "median": 209.92008209228516, "p90": 565.7128234863281, "max": 726.2178955078125, "pos_frac": 0.765625, "sample": [75.04507446289062, 641.4650268554688, 539.5816650390625, 492.657470703125, 411.1673583984375, -58.63154220581055, 269.47308349609375, -762.7946166992188, 505.5869445800781, 569.8771362304688, -838.126220703125, 61.46604919433594, 600.17236328125, 334.4927062988281, 171.44219970703125, -85.67015075683594, 195.15745544433594, 303.69000244140625, 532.6184692382812, -735.9493408203125, -19.16101837158203, 487.6461486816406, 527.414794921875, 344.2138671875, 155.194091796875, 114.08018493652344, 68.29878234863281, 129.28530883789062, 81.88398742675781, 324.1197204589844, 120.58624267578125, 702.8126220703125, -338.8083190917969, 412.2633056640625, 351.16790771484375, 438.40057373046875, 575.8711547851562, -240.21514892578125, 300.57757568359375, 27.85419464111328, 726.2178955078125, 555.99609375, 227.50567626953125, -15.929985046386719, 208.787353515625, 82.5337905883789, 286.6702575683594, 4.590309143066406, -180.58627319335938, -219.31365966796875, 673.9096069335938, 365.1448669433594, 124.75750732421875, -6.133586883544922, 130.17352294921875, -186.7333526611328, 156.19503784179688, 373.4624938964844, -38.19561767578125, -6.720314025878906, 287.7784423828125, 497.554443359375, 212.35841369628906, 211.0528106689453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 212.74732971191406, "std": 336.6778259277344, "min": -400.7961730957031, "p10": -114.0089500427246, "median": 161.5762481689453, "p90": 624.4440246582035, "max": 1802.190185546875, "pos_frac": 0.703125, "sample": [451.1660461425781, 157.64016723632812, 1802.190185546875, 216.3661346435547, -162.7773895263672, 28.399322509765625, 250.67254638671875, 299.0197448730469, -85.64788055419922, -110.20115661621094, 272.558837890625, -32.76103210449219, 115.12163543701172, 691.3070068359375, 39.88258743286133, 41.889766693115234, -113.38323211669922, -400.7961730957031, 493.7046813964844, 459.1390380859375, 759.024658203125, -189.46340942382812, 96.81108093261719, 225.5914306640625, 441.616455078125, 169.52627563476562, -6.438924789428711, -106.02743530273438, 312.96820068359375, 342.7245178222656, -15.86322021484375, 206.53768920898438, 426.0871887207031, -119.19767761230469, 113.91273498535156, 373.5914001464844, 134.23846435546875, 29.561677932739258, 161.55950927734375, 303.1524353027344, -76.85476684570312, 418.80230712890625, 545.3955078125, -0.1351165771484375, 161.59298706054688, 248.08157348632812, 319.1053161621094, 131.7533721923828, 164.11550903320312, 658.3219604492188, 336.781982421875, -55.66900634765625, 899.767578125, -114.27711486816406, 146.0717010498047, 125.45913696289062, 673.7916870117188, 184.0911865234375, -84.4383544921875, -184.9871826171875, 769.8551025390625, 544.2269897460938, -89.06932067871094, -179.35789489746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 250.076416015625, "std": 328.145263671875, "min": -667.89794921875, "p10": -82.24583129882812, "median": 224.92897033691406, "p90": 670.8610656738284, "max": 1229.4644775390625, "pos_frac": 0.828125, "sample": [1229.4644775390625, 345.594970703125, -667.89794921875, 332.22772216796875, 514.4021606445312, 163.97900390625, 225.23239135742188, 440.7730712890625, 366.1736755371094, 87.72526550292969, 494.224609375, 84.18428039550781, 55.12744140625, 335.8899230957031, -76.84977722167969, 587.2782592773438, 887.4771118164062, 784.3777465820312, 244.2744140625, -175.69822692871094, 27.19610595703125, -33.2076416015625, -14.477935791015625, 125.63462829589844, -154.43536376953125, 122.07659912109375, 231.4198455810547, 135.36355590820312, -179.4185791015625, 224.62554931640625, 289.5871887207031, 89.42607116699219, 195.93310546875, 332.9072570800781, 556.3641967773438, 287.8741760253906, 433.700439453125, -455.25921630859375, 477.2914123535156, -326.123291015625, 290.28143310546875, 312.0920715332031, 777.8831787109375, -8.183563232421875, 160.10235595703125, 60.29436111450195, 521.9536743164062, 580.5619506835938, 27.79434585571289, 13.060544967651367, 1019.1351318359375, 50.51261901855469, 248.72262573242188, 83.97232055664062, 193.90200805664062, 700.405517578125, 752.9070434570312, 222.0914306640625, 241.54383850097656, 601.9240112304688, 154.9735565185547, -84.55842590332031, 276.55755615234375, 182.52218627929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 222.81613159179688, "std": 301.95294189453125, "min": -693.5039672851562, "p10": -154.90482177734376, "median": 202.8336944580078, "p90": 581.0900634765626, "max": 952.40234375, "pos_frac": 0.78125, "sample": [-25.041975021362305, 167.1038055419922, -178.34707641601562, -115.0752944946289, 596.5682983398438, 211.07899475097656, 352.0684814453125, 769.4129638671875, 544.9741821289062, -177.35720825195312, 158.8594207763672, 930.4783935546875, -95.46560668945312, -150.0510711669922, 287.982421875, 276.1766052246094, -124.83018493652344, 45.510955810546875, 812.5518188476562, -186.84112548828125, 452.9344482421875, 445.1624450683594, 330.49786376953125, 395.9388732910156, 483.8936462402344, 112.73565673828125, 471.93157958984375, 470.8193359375, 98.71568298339844, 194.58839416503906, 952.40234375, 107.47272491455078, 503.9772033691406, 307.24981689453125, 222.87936401367188, 249.92236328125, -62.05848693847656, -4.689109802246094, 242.62213134765625, 176.4798583984375, 344.72576904296875, 56.37192153930664, 292.3399658203125, 150.9342041015625, 285.903076171875, 335.19403076171875, 172.1569366455078, -197.19400024414062, 146.52011108398438, 152.033935546875, 311.97796630859375, 214.25625610351562, 373.7174377441406, 609.853515625, 893.3709716796875, 19.907577514648438, 103.0567398071289, 39.89045715332031, -693.5039672851562, 188.9399871826172, -156.98500061035156, 75.34909057617188, 480.8575439453125, -192.67434692382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 218.2496337890625, "std": 286.41717529296875, "min": -539.3932495117188, "p10": -114.19660263061523, "median": 206.50069427490234, "p90": 581.4403381347656, "max": 882.2144775390625, "pos_frac": 0.734375, "sample": [155.4421844482422, -239.26388549804688, 310.91998291015625, 656.7100830078125, 364.9540100097656, -110.79859161376953, 525.6907348632812, 139.31723022460938, 339.6036376953125, -25.026519775390625, -539.3932495117188, 161.59719848632812, -244.92593383789062, 391.3388977050781, -55.842864990234375, 200.94444274902344, 228.20761108398438, -357.9136962890625, 106.98788452148438, 581.6299438476562, 119.3048324584961, 555.0670166015625, 215.1581268310547, -64.2772445678711, -155.1551513671875, 621.4991455078125, 134.23886108398438, 107.9127426147461, 427.7369384765625, 478.5438232421875, 672.2971801757812, 382.38824462890625, 742.9637451171875, -58.55253601074219, 882.2144775390625, -115.65289306640625, -39.855552673339844, 2.595853805541992, 212.05694580078125, -12.708953857421875, 15.210899353027344, 9.572601318359375, 489.3377990722656, 361.8697814941406, 377.7879333496094, 139.83587646484375, 442.17486572265625, 492.5243835449219, 627.956298828125, 580.9979248046875, 145.20851135253906, 361.6630554199219, 351.2039794921875, -106.3739013671875, -0.5424423217773438, 555.4466552734375, 249.16006469726562, 329.83233642578125, -131.97007751464844, 346.73199462890625, 62.456302642822266, 410.4426574707031, 180.2208251953125, -20.726463317871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 253.57174682617188, "std": 315.9264221191406, "min": -564.3928833007812, "p10": -115.16457290649413, "median": 254.70114135742188, "p90": 688.829022216797, "max": 896.9725341796875, "pos_frac": 0.78125, "sample": [117.31117248535156, -84.88704681396484, 354.6501770019531, -134.2050323486328, 338.5748291015625, 113.31434631347656, 834.1717529296875, -55.49617004394531, 321.8816833496094, -78.31591796875, 250.64157104492188, 413.1730041503906, 51.124324798583984, 179.30401611328125, 430.4503173828125, 543.212890625, 439.5322265625, 896.9725341796875, 32.67341232299805, 193.990966796875, 218.03158569335938, 623.006103515625, 443.9156494140625, 629.621826171875, 212.40997314453125, 229.31829833984375, 39.550941467285156, 376.5952453613281, -451.5044250488281, -22.355316162109375, -202.08651733398438, -12.762815475463867, 452.4285583496094, 799.593994140625, 662.3585205078125, 314.8299255371094, 399.7057800292969, -564.3928833007812, 576.6655883789062, 273.7194519042969, 778.6239013671875, 217.1436767578125, -106.20326232910156, 707.8590698242188, -43.00421142578125, 282.49725341796875, 507.38665771484375, 258.7607116699219, 465.4625244140625, 100.37487030029297, 44.804561614990234, 303.12408447265625, 799.5723266601562, 107.17353820800781, -128.38888549804688, 119.27853393554688, 168.61636352539062, -419.5784912109375, -119.00513458251953, 581.315673828125, 113.31761169433594, 700.1735229492188, 316.1651611328125, 316.3976745605469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 217.5601348876953, "std": 358.1687927246094, "min": -550.6954345703125, "p10": -186.35728607177734, "median": 154.15056610107422, "p90": 752.5790832519532, "max": 1147.1622314453125, "pos_frac": 0.734375, "sample": [197.74331665039062, 749.3853759765625, 163.20985412597656, 259.97662353515625, -38.431243896484375, 40.941131591796875, 703.499755859375, -10.016555786132812, 73.45584106445312, 143.19525146484375, -288.10040283203125, 753.9478149414062, 116.40956115722656, -58.92839813232422, -176.73834228515625, 365.8906555175781, 412.59075927734375, 676.115234375, 788.0164794921875, 145.09127807617188, 97.98851013183594, 906.7166137695312, 266.2742614746094, 11.964508056640625, 225.6357421875, -210.65350341796875, 192.15931701660156, -85.94822692871094, -4.668415069580078, 22.57393455505371, 308.0790100097656, -218.05645751953125, 411.0279846191406, 618.11474609375, 1105.8524169921875, 362.9154052734375, 625.3723754882812, 207.8929443359375, -49.21672058105469, 206.7115478515625, 127.67998504638672, -115.73970794677734, -550.6954345703125, -127.912353515625, 0.8804702758789062, 229.79229736328125, 140.98532104492188, 249.43785095214844, -436.36407470703125, 615.9822387695312, 769.1498413085938, 55.4371452331543, 225.0787811279297, 813.6913452148438, 35.949851989746094, -33.7359619140625, 460.80889892578125, 346.7674560546875, 41.64663314819336, 131.48509216308594, 1147.1622314453125, 357.8061218261719, -388.95526123046875, -190.4796905517578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 244.39990234375, "std": 358.09417724609375, "min": -268.7191467285156, "p10": -141.29472045898436, "median": 159.4653778076172, "p90": 740.3676086425783, "max": 1531.07763671875, "pos_frac": 0.75, "sample": [277.6470642089844, 270.66009521484375, 696.9066162109375, 475.0643310546875, -257.80413818359375, -64.57105255126953, 646.1402587890625, 179.05181884765625, 91.76698303222656, 749.7680053710938, 29.412099838256836, 190.61465454101562, 308.52685546875, 70.25180053710938, 772.9540405273438, -166.27667236328125, 111.67233276367188, -188.9606475830078, -6.01116943359375, 7.651496887207031, 139.87893676757812, 758.281005859375, 82.94918823242188, 19.564674377441406, 258.48028564453125, 234.5188751220703, -134.37564086914062, 264.11663818359375, 400.35650634765625, 125.3310546875, 186.41253662109375, 750.7584228515625, -144.26004028320312, 282.4444580078125, -110.53414916992188, -40.426910400390625, 1235.2374267578125, -268.7191467285156, 219.335205078125, 113.59162139892578, 692.726318359375, 182.74954223632812, 984.3541259765625, 477.2537841796875, 267.49090576171875, 33.486244201660156, -202.13882446289062, 126.75645446777344, 422.0910339355469, 582.1918334960938, 129.43408203125, 718.433349609375, -44.51752471923828, 121.60657501220703, 16.916500091552734, 494.27197265625, -6.931133270263672, -42.75501251220703, -167.55841064453125, 204.58724975585938, 1531.07763671875, 581.3651733398438, -44.20452117919922, 15.501190185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 235.54818725585938, "std": 437.3805236816406, "min": -943.0171508789062, "p10": -228.7045806884765, "median": 190.34264373779297, "p90": 699.1506652832031, "max": 1961.8343505859375, "pos_frac": 0.71875, "sample": [308.55377197265625, 213.3315887451172, 157.58477783203125, 595.460205078125, 504.8810729980469, 776.3720703125, -71.71273803710938, 34.76344299316406, 134.46597290039062, -393.20770263671875, -245.5448455810547, -54.15898895263672, 691.14599609375, 961.6328125, 348.0260009765625, -365.6063537597656, 145.56507873535156, 662.6023559570312, 15.501953125, -19.540679931640625, 50.89037322998047, 152.1167449951172, 90.96231079101562, 834.8189697265625, 702.5812377929688, 52.812774658203125, 272.4388427734375, 457.1248779296875, 484.6460266113281, 1961.8343505859375, 151.95901489257812, -500.4958190917969, 203.2974090576172, 525.5355224609375, -484.6715087890625, -180.30929565429688, 137.21969604492188, 250.772216796875, 558.0874633789062, -476.7267150878906, -90.4464111328125, 465.0772705078125, -41.10154724121094, -943.0171508789062, 494.56201171875, 372.14208984375, 247.9627685546875, 455.0841979980469, 383.6065979003906, -189.41062927246094, 133.9983673095703, -141.66867065429688, -71.48565673828125, 430.65814208984375, 267.73565673828125, 177.38787841796875, 627.947998046875, 353.031494140625, 714.143310546875, 1133.284912109375, -12.59854507446289, -73.34761810302734, 132.69937133789062, 603.8275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 276.0215148925781, "std": 389.77142333984375, "min": -528.3689575195312, "p10": -84.74557037353514, "median": 189.04432678222656, "p90": 830.7071716308599, "max": 1366.442626953125, "pos_frac": 0.75, "sample": [-91.8108139038086, 174.89300537109375, 554.3257446289062, -49.83282470703125, 332.4168701171875, 212.160888671875, -68.26000213623047, 324.08782958984375, 64.47062683105469, -0.9163246154785156, 99.25462341308594, 441.254638671875, -57.96143341064453, 551.2418823242188, 389.3544006347656, 653.8114624023438, 878.5465698242188, 294.7212219238281, -67.60299682617188, 170.91636657714844, 161.59234619140625, 568.9268798828125, 108.19584655761719, 222.10275268554688, -264.9467468261719, 203.19564819335938, 716.6177978515625, 245.5118865966797, 958.597900390625, 209.32098388671875, 100.36102294921875, -528.3689575195312, 77.04812622070312, -160.2528839111328, -10.438362121582031, 495.7960205078125, -62.60712814331055, 488.2001037597656, 48.881065368652344, 1366.442626953125, -274.76483154296875, 308.9152526855469, 981.5975341796875, 22.770742416381836, -18.8973388671875, 1213.077392578125, 719.0819091796875, 270.2186279296875, 604.6867065429688, 56.654869079589844, 70.4066162109375, -294.3393249511719, 85.98886108398438, 78.10969543457031, -30.921585083007812, -212.48130798339844, 357.17156982421875, 324.52069091796875, 1057.1138916015625, 599.4776611328125, 1181.057861328125, 650.9306030273438, 104.73497009277344, 61.01716613769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 174.32579040527344, "std": 408.4460144042969, "min": -813.8309326171875, "p10": -254.620979309082, "median": 145.67864990234375, "p90": 745.9463562011724, "max": 1235.5169677734375, "pos_frac": 0.734375, "sample": [1235.5169677734375, 71.49559783935547, 943.64501953125, 25.614486694335938, 352.73785400390625, 116.75581359863281, -93.58914947509766, -63.07606506347656, -366.1143798828125, 97.08055877685547, 79.56242370605469, -3.1212005615234375, 137.35267639160156, 186.2694091796875, -57.161773681640625, -783.463623046875, 171.6858673095703, -205.1117706298828, -106.3541488647461, 149.69558715820312, 1138.396728515625, 330.647705078125, 608.5524291992188, 355.39959716796875, -636.5487060546875, -659.8345336914062, 346.3237609863281, 290.43707275390625, 804.8294677734375, -813.8309326171875, 292.4999694824219, 418.712158203125, 215.1318817138672, 112.54905700683594, 96.64466857910156, -184.036376953125, 89.15796661376953, 509.955322265625, 387.03759765625, 8.830429077148438, -229.41233825683594, -147.24925231933594, 453.5433044433594, -304.8403015136719, 310.63812255859375, 36.76298522949219, 331.5129089355469, 1016.7611083984375, 387.6996154785156, 82.9067611694336, 602.2136840820312, 93.34942626953125, 183.14476013183594, 222.3013458251953, -203.3558807373047, 262.2283020019531, -265.4246826171875, 254.37791442871094, 117.50965118408203, 141.66171264648438, 813.581298828125, 886.165771484375, 288.675537109375, 221.82272338867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 297.13909912109375, "std": 379.1122741699219, "min": -573.9755249023438, "p10": -143.8123809814453, "median": 262.30469512939453, "p90": 731.1402954101563, "max": 1503.5443115234375, "pos_frac": 0.796875, "sample": [232.8704376220703, -235.3209228515625, 76.1663589477539, 518.4422607421875, 47.38835906982422, 270.59747314453125, -176.7271270751953, 1397.0943603515625, 789.996826171875, -32.54829788208008, -134.09524536132812, 319.3655090332031, 185.50689697265625, 609.6796264648438, 580.6771850585938, 159.3271484375, 719.67822265625, 369.59503173828125, -208.56649780273438, 147.41737365722656, -178.65032958984375, 4.343938827514648, 549.849365234375, 846.0147705078125, 139.7980499267578, 371.817138671875, 345.5624084472656, -41.6888427734375, 290.5101623535156, -365.02801513671875, -61.46331024169922, 1503.5443115234375, 338.40252685546875, 390.02783203125, 214.6339569091797, 485.35601806640625, 1078.227294921875, 364.1923522949219, 459.9864501953125, 249.4049072265625, -147.97686767578125, 1017.2687377929688, 611.00146484375, 213.48123168945312, 522.654541015625, 524.7907104492188, 536.0365600585938, 150.7089080810547, 94.99458312988281, 159.5161590576172, 254.0119171142578, 241.6095428466797, 342.4637145996094, 273.737548828125, 438.9430236816406, 205.3702392578125, 21.032733917236328, -97.09718322753906, -77.04686737060547, 53.803009033203125, 736.0526123046875, -573.9755249023438, 312.6318359375, 581.5050659179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 265.5284423828125, "std": 366.5321044921875, "min": -616.5929565429688, "p10": -176.89013519287104, "median": 233.64215087890625, "p90": 652.3448791503906, "max": 1258.872314453125, "pos_frac": 0.78125, "sample": [248.28250122070312, 65.71102142333984, 61.38823699951172, 928.5137939453125, 41.90480041503906, 586.5791015625, 411.0168762207031, 152.29522705078125, 27.65826416015625, 199.48033142089844, 20.363704681396484, 604.4210205078125, 3.0612335205078125, 397.2758483886719, 155.72171020507812, -616.5929565429688, 94.7469482421875, 469.8730773925781, 276.949462890625, 227.5276336669922, 446.8539123535156, 311.204833984375, 500.62451171875, 771.4017333984375, -115.72465515136719, 165.17474365234375, 1147.7906494140625, 553.6417236328125, 473.91357421875, 620.4686279296875, 593.0662231445312, 647.1989135742188, 239.7566680908203, 703.7340698242188, 62.48234558105469, -277.48236083984375, 151.93417358398438, 250.97540283203125, -203.10391235351562, 551.8280639648438, -14.412208557128906, 1196.5638427734375, 496.2503967285156, -215.90870666503906, 654.55029296875, 412.6903076171875, 247.43577575683594, -274.2104187011719, -1.9011459350585938, -36.27870178222656, 182.95767211914062, 22.829063415527344, -68.84666442871094, 486.6545715332031, 315.850341796875, 187.27540588378906, -48.595062255859375, -218.117431640625, 1258.872314453125, -89.69407653808594, 513.6243286132812, 200.28158569335938, 255.54681396484375, -421.5158996582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 296.2715759277344, "std": 320.4066162109375, "min": -479.4041748046875, "p10": -11.112357330322261, "median": 257.35892486572266, "p90": 690.4735046386719, "max": 1089.2591552734375, "pos_frac": 0.859375, "sample": [27.53791046142578, 319.99114990234375, 82.81843566894531, 742.609619140625, 858.2925415039062, 314.9518737792969, 12.635669708251953, -479.4041748046875, 138.12098693847656, 604.9462890625, 900.6951293945312, 551.153564453125, -6.785545349121094, 694.7843017578125, 566.9241943359375, 567.5570068359375, 663.7041015625, 371.1494140625, 185.4420928955078, 368.9723205566406, -218.15777587890625, 187.44656372070312, 67.61050415039062, 212.0910186767578, 280.98651123046875, -423.90570068359375, 123.97911071777344, 167.93606567382812, 498.59283447265625, 227.54486083984375, 575.7578125, -99.76127624511719, 36.558563232421875, 202.81915283203125, 11.125543594360352, 112.19913482666016, -12.966705322265625, 404.5264892578125, 431.1041564941406, 664.262451171875, 334.802978515625, 130.86888122558594, 680.4149780273438, 1089.2591552734375, 308.00811767578125, 87.92642974853516, 905.27734375, 506.3498229980469, 254.4550018310547, 260.2628479003906, 441.4943542480469, -3.5721588134765625, -84.16510009765625, 119.89566040039062, 127.15381622314453, 248.97067260742188, 160.6056365966797, 386.5343017578125, 874.024169921875, 232.3067626953125, -349.88714599609375, 679.4522094726562, 312.6593017578125, 322.4360656738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 303.7477722167969, "std": 386.8155517578125, "min": -462.3143310546875, "p10": -124.67441406249998, "median": 276.64093017578125, "p90": 697.7910949707032, "max": 2024.87646484375, "pos_frac": 0.84375, "sample": [296.2462158203125, 62.96814727783203, 127.49529266357422, 293.7400817871094, 14.263580322265625, -462.3143310546875, 433.4135437011719, 114.28568267822266, 373.98382568359375, -94.89736938476562, -23.488460540771484, 637.345947265625, 266.13916015625, 120.81294250488281, 715.3084716796875, 666.0932006835938, 809.09619140625, 6.700666427612305, 2024.87646484375, 801.2718505859375, 153.92355346679688, 1148.284912109375, 521.2391967773438, 452.50555419921875, -219.54591369628906, 174.45269775390625, 573.279296875, -137.43600463867188, 268.94305419921875, 213.04847717285156, -148.943115234375, 52.17481994628906, 44.709205627441406, -171.68423461914062, 456.5389404296875, -66.66017150878906, 414.34197998046875, 499.8538513183594, 128.8416748046875, 36.20471954345703, 567.897705078125, 37.85845184326172, 103.10877990722656, -312.4510498046875, 699.7314453125, 493.3782958984375, 469.6734313964844, -248.3943328857422, 288.64141845703125, 36.042030334472656, 404.20623779296875, 421.1641845703125, 417.53729248046875, 284.33880615234375, 207.53045654296875, 602.4657592773438, 344.0467224121094, 86.80296325683594, 1007.40185546875, 610.7908935546875, 129.54501342773438, 693.2636108398438, 511.7164001464844, 6.147369384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 292.04010009765625, "std": 324.2012939453125, "min": -382.71258544921875, "p10": -116.52373809814452, "median": 252.284423828125, "p90": 678.246844482422, "max": 1260.9903564453125, "pos_frac": 0.8125, "sample": [70.82067108154297, 84.49411010742188, -156.41781616210938, 190.2288818359375, 709.6187744140625, 102.73382568359375, 119.43434143066406, -69.26554107666016, 344.1647644042969, -382.71258544921875, 515.4573974609375, 143.7606201171875, 694.6702880859375, 101.67977905273438, 253.13125610351562, 504.88885498046875, 61.44249725341797, 92.70594787597656, 619.7752075195312, -126.89462280273438, -123.46595764160156, 104.7406005859375, 639.9254760742188, 489.2664794921875, 223.0799102783203, 251.43759155273438, 295.531005859375, 227.42919921875, 556.5744018554688, 889.3785400390625, 334.8107604980469, 603.7492065429688, 168.01942443847656, 373.0670471191406, 1260.9903564453125, -35.07560729980469, 4.6068267822265625, 239.85887145996094, -62.742088317871094, 380.0144958496094, 467.9180908203125, 516.8067626953125, 267.240478515625, -131.3187255859375, 995.9739379882812, 341.714111328125, 228.42572021484375, -100.32522583007812, 368.5531311035156, 121.29751586914062, 257.22247314453125, -234.7677459716797, 1009.356201171875, -172.05889892578125, 639.4306030273438, 795.7236938476562, -14.227996826171875, 400.3519592285156, 115.5364761352539, 270.955078125, 552.7078857421875, 634.32861328125, 490.90704345703125, 173.90338134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 277.12255859375, "std": 425.6699523925781, "min": -824.804443359375, "p10": -193.40676422119137, "median": 232.85426330566406, "p90": 758.1552612304691, "max": 1852.3631591796875, "pos_frac": 0.734375, "sample": [-141.1320343017578, 57.326332092285156, 438.7968444824219, -58.41859436035156, 557.9100952148438, 633.68701171875, 233.64532470703125, -60.02356719970703, 275.9185791015625, 232.06320190429688, 803.5789794921875, -334.7852783203125, -73.73587799072266, 310.3486633300781, 631.4102783203125, 25.75313949584961, 653.53857421875, 116.68014526367188, 142.7786407470703, -234.03208923339844, 797.5770263671875, 257.6565246582031, 515.0067749023438, 831.0673828125, 285.072021484375, 628.43017578125, -303.9974670410156, 94.88516235351562, 364.22607421875, 425.4805603027344, 170.87539672851562, 224.00088500976562, -47.49249267578125, 624.504638671875, 626.8821411132812, 666.171142578125, -110.18709564208984, 75.37287902832031, -164.5523681640625, -313.9222412109375, 1852.3631591796875, 651.8867797851562, -69.93428039550781, 327.7568054199219, 275.690673828125, 149.31314086914062, -112.39480590820312, -824.804443359375, -72.88742065429688, 1285.563232421875, 228.47247314453125, 913.6730346679688, 653.4478759765625, -205.77293395996094, 456.866455078125, 186.15835571289062, 482.13470458984375, 993.2620849609375, 312.77130126953125, 2.5618133544921875, -237.6048126220703, 52.91766357421875, 121.48699951171875, 454.54925537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 323.22100830078125, "std": 306.4082946777344, "min": -615.139404296875, "p10": 14.3903621673584, "median": 293.5113220214844, "p90": 740.9820861816407, "max": 1106.5325927734375, "pos_frac": 0.90625, "sample": [400.16424560546875, 622.9398803710938, 757.1278076171875, 435.1595153808594, 88.3681640625, 623.4625244140625, 1052.58056640625, -131.14991760253906, 381.64642333984375, 49.18958282470703, 589.3367309570312, 581.8331298828125, 624.6812744140625, 297.3888244628906, 433.4029541015625, 692.703369140625, 93.03744506835938, 17.1529541015625, 290.8973388671875, 123.76193237304688, 382.78448486328125, 759.2243041992188, 520.7789306640625, 762.77001953125, 239.36070251464844, -26.049495697021484, 118.19239807128906, 869.2850341796875, 265.16973876953125, 641.314208984375, 65.24990844726562, 382.99859619140625, 228.31748962402344, 228.7578582763672, 310.0921630859375, 13.20639419555664, 259.8193664550781, 404.50433349609375, 222.63641357421875, 137.44142150878906, 750.7422485351562, -201.4644317626953, 121.75774383544922, 429.2923278808594, 135.1089324951172, 295.602294921875, 89.45333099365234, 718.2083740234375, 300.34326171875, 72.27735900878906, 535.7094116210938, -138.04769897460938, 540.5747680664062, 278.5023193359375, 63.994911193847656, -615.139404296875, 370.810302734375, 291.42034912109375, 64.52460479736328, 339.5350341796875, 192.12857055664062, 1106.5325927734375, -70.50475311279297, 205.24176025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 227.8025360107422, "std": 387.2896728515625, "min": -1107.637451171875, "p10": -201.02688903808593, "median": 228.78921508789062, "p90": 630.1599365234376, "max": 1052.87353515625, "pos_frac": 0.78125, "sample": [20.249603271484375, -100.22109985351562, -39.417694091796875, 322.98565673828125, 518.0488891601562, 232.47268676757812, 343.1279296875, 519.24072265625, -51.493247985839844, 231.84002685546875, 535.68310546875, 984.4944458007812, 551.7741088867188, 824.3981323242188, 136.63665771484375, 225.7384033203125, 85.9944839477539, -1107.637451171875, -510.90985107421875, -206.135009765625, 211.560546875, 57.59302520751953, 426.2408752441406, 563.4637451171875, 597.1762084960938, 213.12986755371094, 299.91912841796875, 218.9686737060547, 375.57171630859375, 643.6041259765625, 169.13198852539062, 1052.87353515625, 924.248291015625, 368.3932800292969, 401.9414367675781, 592.9408569335938, 444.125, -238.8563995361328, 300.0490417480469, 281.08746337890625, -40.95258331298828, -106.05648803710938, 452.0843811035156, -938.4703369140625, 150.75314331054688, 114.21895599365234, 177.19166564941406, 216.203125, 303.744140625, 87.5843734741211, 598.7901611328125, -105.94084167480469, -189.10794067382812, 705.2528686523438, 540.1401977539062, 126.92178344726562, 359.602294921875, -404.2597351074219, 186.1610107421875, 8.618456840515137, 669.2890625, -234.3157196044922, 328.95404052734375, 152.922607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 293.60125732421875, "std": 287.8074645996094, "min": -346.0213317871094, "p10": -84.8662673950195, "median": 266.9399719238281, "p90": 676.8442687988281, "max": 942.7673950195312, "pos_frac": 0.84375, "sample": [717.2789306640625, -162.28268432617188, 643.3192749023438, 606.773193359375, -93.5779037475586, 594.3490600585938, 484.10882568359375, 431.5977783203125, 441.8940734863281, -64.53911590576172, 567.6090087890625, 291.8272705078125, 204.96377563476562, 130.6414337158203, 378.153076171875, 256.633544921875, 678.3223876953125, 353.585693359375, 415.89349365234375, 31.715131759643555, 328.81939697265625, 37.42481231689453, 404.4844970703125, 62.35443878173828, 225.36622619628906, 796.0260620117188, 77.41069030761719, -273.01300048828125, 541.241943359375, 208.56396484375, -108.8231201171875, 427.533935546875, 148.26812744140625, 195.11102294921875, 783.037841796875, 126.89231872558594, 277.24639892578125, 630.64013671875, 165.89691162109375, 240.24005126953125, 222.6390380859375, -167.891845703125, 203.60455322265625, 854.240966796875, 289.02166748046875, 402.4049072265625, 193.89715576171875, 837.019775390625, 942.7673950195312, 85.77702331542969, 174.2375946044922, 333.67767333984375, 394.37811279296875, 57.00931930541992, -346.0213317871094, 343.41351318359375, -111.94998168945312, 521.488525390625, 455.000732421875, 673.3953247070312, -8.380332946777344, 200.64822387695312, -25.106094360351562, 62.21953582763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 251.96585083007812, "std": 332.6508483886719, "min": -370.9652099609375, "p10": -125.4331214904785, "median": 190.73213958740234, "p90": 621.2883178710938, "max": 1572.2236328125, "pos_frac": 0.765625, "sample": [600.1127319335938, 623.1992797851562, -65.54045867919922, 115.47881317138672, 219.59783935546875, 64.38642883300781, 224.24310302734375, 159.6730194091797, 126.68594360351562, 45.79010009765625, 119.2868881225586, 382.0733642578125, 560.8529663085938, 577.5508422851562, 29.38473892211914, 120.28907775878906, 297.15802001953125, 252.48709106445312, -97.12895202636719, 199.43116760253906, 605.9253540039062, -148.9853515625, 247.28326416015625, 338.8505554199219, 738.19775390625, -245.48712158203125, -55.32017517089844, 616.8294067382812, -370.9652099609375, 354.9612731933594, -76.03724670410156, 182.03311157226562, 428.8619689941406, 825.895263671875, 742.941162109375, 18.836994171142578, -94.39686584472656, 180.91845703125, 1572.2236328125, 594.4951171875, 347.4555969238281, -179.4187774658203, 758.3800048828125, 16.834922790527344, 311.15594482421875, 152.68663024902344, 535.9821166992188, 150.2672119140625, 493.06597900390625, 484.12774658203125, -140.87391662597656, 435.41259765625, -158.115234375, 496.42333984375, 0.631988525390625, -104.3559341430664, -134.46620178222656, 201.47793579101562, 824.9912109375, 392.348388671875, -16.987751007080078, -44.704734802246094, 159.62066650390625, 131.80020141601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 297.4459228515625, "std": 369.9325256347656, "min": -634.5756225585938, "p10": -74.1918891906738, "median": 255.79815673828125, "p90": 741.6256225585939, "max": 1430.589599609375, "pos_frac": 0.828125, "sample": [237.12457275390625, -24.11328125, 950.7445678710938, 225.78358459472656, 562.11474609375, 19.908767700195312, 755.634033203125, 307.3973388671875, -37.82424545288086, 163.9800567626953, -593.2594604492188, 523.28076171875, 306.5178527832031, 439.17498779296875, 260.51495361328125, 251.08135986328125, 487.7652893066406, 348.1781005859375, -47.39678192138672, -634.5756225585938, 90.607177734375, 503.7330322265625, -85.67550659179688, 810.3516845703125, -132.50546264648438, -115.08486938476562, 653.7442626953125, 29.787572860717773, 58.97273254394531, 249.58151245117188, 504.5579528808594, 97.99229431152344, 361.626220703125, 99.30791473388672, 418.345703125, 98.4959945678711, 281.6638488769531, 195.09591674804688, 510.45635986328125, 215.76023864746094, 484.9649658203125, 465.2188415527344, 29.246292114257812, 452.10308837890625, -14.17523193359375, 708.9393310546875, 1430.589599609375, 317.4941101074219, 19.04313087463379, 115.59689331054688, 1335.8048095703125, 1040.7862548828125, 306.5810241699219, 479.61260986328125, 84.19017791748047, 409.3146057128906, 693.7560424804688, -136.98568725585938, 405.2530212402344, -88.10325622558594, 240.5426483154297, 41.32444381713867, 840.896484375, 25.698352813720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 362.0201416015625, "std": 401.32427978515625, "min": -593.9193115234375, "p10": -80.87234344482417, "median": 280.1631164550781, "p90": 1037.4494934082038, "max": 1179.255615234375, "pos_frac": 0.859375, "sample": [181.4441680908203, 178.990234375, 780.4216918945312, 1099.0023193359375, -374.17877197265625, 341.5760192871094, 699.4007568359375, -24.595197677612305, 1161.803955078125, -182.6181182861328, 1102.870849609375, 200.54612731933594, 893.8262329101562, -271.4051818847656, 1129.25830078125, -128.36669921875, 276.52191162109375, 845.1041259765625, 210.97744750976562, 226.5226593017578, 9.050323486328125, 576.6802978515625, 323.7691650390625, 17.630258560180664, 376.62152099609375, 1129.85693359375, 721.58935546875, -154.91259765625, 315.123046875, 250.04795837402344, 242.9654541015625, 575.921875, 173.85406494140625, 191.99319458007812, 176.5689239501953, 201.85687255859375, 297.32281494140625, 415.3847351074219, 99.384033203125, -593.9193115234375, 427.6044921875, 94.05429077148438, 283.8043212890625, 591.6510620117188, 67.42625427246094, 37.141334533691406, -104.99111938476562, 487.646484375, 869.10693359375, 225.60885620117188, 729.1078491210938, 103.003173828125, 452.9942626953125, 545.677001953125, 125.33429718017578, 122.29061889648438, -12.824169158935547, 865.8023071289062, 382.57159423828125, 356.66961669921875, 1179.255615234375, 380.1614685058594, 112.80146789550781, 1153.5006103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 347.93292236328125, "std": 340.36724853515625, "min": -226.57638549804688, "p10": 7.8407041549682726, "median": 271.0441131591797, "p90": 865.8963623046877, "max": 1289.8060302734375, "pos_frac": 0.90625, "sample": [191.852294921875, 232.6650848388672, 822.0548095703125, 107.031982421875, 18.814208984375, 354.2228088378906, 165.00881958007812, 28.141265869140625, 311.3789367675781, 775.8161010742188, 297.1036376953125, 29.55107307434082, 92.63002014160156, 1222.4259033203125, 346.5072021484375, -18.88379669189453, 48.99073791503906, 178.68051147460938, 300.3680419921875, 535.0596923828125, 357.8692626953125, 77.40469360351562, -88.819580078125, 1072.15478515625, -94.82806396484375, 437.32012939453125, 1289.8060302734375, 438.3520812988281, 566.6539916992188, 39.54559326171875, 355.197265625, 523.5418701171875, 672.47216796875, 189.6832733154297, 135.47509765625, 229.36618041992188, 3.1377735137939453, 467.8868408203125, 1055.51220703125, 154.94711303710938, 785.8892211914062, 442.8792724609375, 316.8399658203125, 72.30974578857422, -226.57638549804688, 216.742919921875, 75.7500991821289, -8.863372802734375, 891.6287841796875, 164.86041259765625, 884.293701171875, 1017.0117797851562, 335.1500549316406, -12.19915771484375, 189.09909057617188, 93.61045837402344, 522.0618896484375, 28.159698486328125, 454.45654296875, 221.612548828125, 353.5105895996094, 459.4282531738281, 822.96923828125, 244.98458862304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 340.5579833984375, "std": 333.78692626953125, "min": -325.73370361328125, "p10": -41.361806869506836, "median": 397.14854431152344, "p90": 709.6432861328126, "max": 1282.03466796875, "pos_frac": 0.828125, "sample": [-37.783714294433594, 489.1412048339844, 496.68402099609375, 455.7715759277344, 450.55596923828125, 527.1509399414062, 654.6589965820312, 648.9677124023438, 95.16085052490234, 47.2335205078125, 1042.2303466796875, 387.878173828125, 117.27154541015625, 204.39405822753906, 73.81471252441406, 249.3786163330078, 541.0594482421875, 252.1487579345703, -19.891136169433594, 603.333984375, 760.9124145507812, -11.73537826538086, 274.66119384765625, 449.47723388671875, 445.90728759765625, 976.6224975585938, -325.73370361328125, -258.1269226074219, 717.8095703125, 533.2464599609375, 86.00648498535156, 441.4417419433594, 510.2033386230469, 543.55224609375, 313.951904296875, -4.4673614501953125, 646.474853515625, 528.2582397460938, 189.31253051757812, -68.4869384765625, 522.3917846679688, 900.0125122070312, 414.90472412109375, 98.06797790527344, 15.022663116455078, 69.85932922363281, 579.5730590820312, 16.90979766845703, 538.1943969726562, 376.1943054199219, -167.18621826171875, 690.588623046875, 292.9913330078125, 831.326904296875, -278.5621337890625, 417.333740234375, 5.9141082763671875, -146.93182373046875, 406.4189147949219, -42.8952751159668, 290.35577392578125, 608.6675415039062, 46.077415466308594, 1282.03466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 306.0711975097656, "std": 374.14990234375, "min": -448.5268859863281, "p10": -201.47963409423826, "median": 268.1305236816406, "p90": 661.0192810058595, "max": 1561.439697265625, "pos_frac": 0.796875, "sample": [-448.5268859863281, 598.1360473632812, 483.134765625, 211.2749786376953, 578.9970703125, -145.41197204589844, 1561.439697265625, -227.50523376464844, 340.2298583984375, 579.4564208984375, 80.18944549560547, 261.9588623046875, 587.201171875, 395.1252136230469, 783.0755004882812, -106.3829345703125, -223.05519104003906, -2.1890506744384766, 222.85513305664062, 618.3678588867188, 383.7616882324219, 509.2445373535156, 81.5790023803711, 603.9749145507812, 265.58026123046875, 409.09356689453125, 291.0860290527344, 163.51898193359375, 378.8486328125, 529.296630859375, 179.40277099609375, 827.2698974609375, 505.3582458496094, 908.3046875, 110.27423095703125, -39.4310302734375, 499.5700988769531, 89.07572937011719, 501.2822265625, 186.4969482421875, 946.6663818359375, 375.5282287597656, -54.883811950683594, -357.1360778808594, 261.90106201171875, -253.61669921875, 242.0904541015625, -196.74554443359375, 245.06346130371094, 1429.1241455078125, 561.2601318359375, 234.27926635742188, 255.0084686279297, 98.82841491699219, 173.0418701171875, 679.2984619140625, 288.0750427246094, 442.6339111328125, 307.58270263671875, 270.6807861328125, -203.50852966308594, 393.73828125, -314.2724304199219, 231.9601593017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 320.7283935546875, "std": 420.7595520019531, "min": -627.6669921875, "p10": -163.54122772216797, "median": 285.9645080566406, "p90": 849.1275878906253, "max": 1415.83642578125, "pos_frac": 0.796875, "sample": [86.69477844238281, 39.03143310546875, 397.11566162109375, 276.52484130859375, 206.04046630859375, 590.0259399414062, -416.05072021484375, 138.1925506591797, 517.64892578125, -403.5251159667969, 539.6583251953125, 287.4879150390625, 475.0908508300781, 614.2340087890625, -32.41618347167969, 547.9271850585938, -165.06753540039062, 519.33837890625, 327.281005859375, 945.164306640625, 27.68951416015625, 875.2542114257812, 770.7869873046875, 5.717906951904297, 379.4080505371094, -159.97984313964844, 580.2699584960938, -324.8031005859375, 11.553417205810547, -33.664207458496094, 254.74220275878906, -39.105918884277344, 912.28857421875, -627.6669921875, 1415.83642578125, 350.64923095703125, 1073.0198974609375, 407.240478515625, 279.9703369140625, -429.1092529296875, 186.27059936523438, 138.84149169921875, 552.6209716796875, 184.39801025390625, 72.872314453125, 357.57830810546875, -439.7454528808594, 183.6712646484375, 427.3292541503906, 643.2113037109375, 675.2853393554688, 89.45536804199219, 283.3404541015625, 1142.8489990234375, 284.44110107421875, 107.15071868896484, 1168.596923828125, -135.87022399902344, 768.6244506835938, 759.523193359375, 788.1654663085938, 370.85601806640625, -13.43679428100586, 710.0950927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 355.7840576171875, "std": 395.54437255859375, "min": -306.54083251953125, "p10": -148.16010513305662, "median": 341.1694641113281, "p90": 840.4096435546876, "max": 1448.912841796875, "pos_frac": 0.78125, "sample": [732.880859375, 692.1453247070312, 551.0185546875, -81.44525146484375, 130.02330017089844, 826.314453125, 846.450439453125, 285.8681945800781, -160.37435913085938, 245.6835479736328, -119.6601791381836, 724.6344604492188, 99.29350280761719, 559.8763427734375, 183.70408630371094, 236.22515869140625, 268.3830871582031, -306.54083251953125, 110.92280578613281, 1448.912841796875, 1330.71875, 318.3748779296875, 418.1047668457031, -13.47186279296875, 1036.5279541015625, 227.21890258789062, 1328.7943115234375, 390.04241943359375, -99.6172103881836, 35.671539306640625, 387.0032653808594, -169.66122436523438, 583.4144897460938, -182.84437561035156, 579.135498046875, 591.5968017578125, -79.9342269897461, 452.8235168457031, 290.1300964355469, -66.95995330810547, 933.91552734375, 438.3100891113281, 623.7263793945312, 492.4163818359375, 363.96405029296875, 147.01577758789062, 132.8737030029297, 19.863540649414062, 384.01300048828125, 256.20086669921875, -219.32188415527344, 74.34212493896484, 175.726806640625, 535.515380859375, -303.570068359375, 589.069580078125, 778.1556396484375, 494.84808349609375, -176.75930786132812, 873.9390869140625, 597.3633422851562, 492.91595458984375, -9.783790588378906, 444.0538330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 280.931640625, "std": 417.7883605957031, "min": -650.696044921875, "p10": -111.27392501831054, "median": 221.50686645507812, "p90": 738.5091186523438, "max": 1525.363037109375, "pos_frac": 0.71875, "sample": [142.74655151367188, -58.14686584472656, 741.2542724609375, 381.3770446777344, -113.77381134033203, 642.3790283203125, -650.696044921875, 1525.363037109375, 113.35092163085938, 597.5567626953125, 76.47090911865234, -69.12149810791016, 314.90863037109375, 510.8891906738281, 1095.03466796875, 457.135498046875, 715.0709228515625, -29.242124557495117, 248.5790557861328, 461.09814453125, 424.918701171875, 732.103759765625, 682.4309692382812, 1136.281982421875, -262.45660400390625, -34.099090576171875, 967.0717163085938, 1293.8172607421875, 171.13165283203125, 218.21405029296875, 432.0984802246094, 189.85569763183594, 275.52264404296875, 4.092376708984375, 59.9080810546875, 198.09425354003906, -212.62750244140625, -2.560546875, -99.18255615234375, -94.29367065429688, 891.8041381835938, -19.84449005126953, 544.9193725585938, 679.77197265625, -441.3001708984375, 372.6969909667969, 265.91387939453125, 579.9852294921875, 16.53155517578125, -105.44085693359375, 580.5147705078125, -19.238805770874023, 73.41360473632812, 134.05084228515625, 584.0394287109375, -100.7803726196289, -331.4525451660156, 190.70932006835938, 340.51824951171875, 224.7996826171875, 132.9307861328125, -444.9661865234375, 338.58453369140625, 308.90826416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 222.7821807861328, "std": 427.90301513671875, "min": -583.0388793945312, "p10": -264.3329162597656, "median": 168.7634506225586, "p90": 714.0878051757813, "max": 1460.1707763671875, "pos_frac": 0.703125, "sample": [378.5354309082031, 19.753517150878906, 33.06671905517578, -224.7887420654297, 413.4027404785156, 789.1065673828125, 25.587846755981445, 726.455078125, 499.62103271484375, 946.3748779296875, 520.1229248046875, -244.87994384765625, -432.892333984375, 135.90524291992188, 175.08657836914062, 428.388671875, 504.939453125, 226.62112426757812, -219.16864013671875, 611.9450073242188, -216.41934204101562, 15.10888671875, 312.4705505371094, -426.91644287109375, -143.3492889404297, 65.39126586914062, 685.2308349609375, -583.0388793945312, 24.761234283447266, -87.41670227050781, 1359.6806640625, 525.22021484375, -257.0340576171875, 202.96446228027344, 530.448974609375, 1004.6546020507812, -267.46099853515625, -24.75296974182129, 1460.1707763671875, 124.60844421386719, 162.44032287597656, -132.19932556152344, -196.90638732910156, 150.81976318359375, -295.2225646972656, -71.44300842285156, 397.1698303222656, 490.95745849609375, -383.7947692871094, 136.74118041992188, -81.51510620117188, 266.94775390625, 1231.474609375, 402.1609191894531, 373.33868408203125, -309.84527587890625, 217.2855224609375, 647.1258544921875, 443.93878173828125, 531.523193359375, 35.63417053222656, 128.34530639648438, 224.7661895751953, 270.8108825683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 293.15423583984375, "std": 440.3570251464844, "min": -577.8319091796875, "p10": -258.4828506469726, "median": 255.0908203125, "p90": 746.1726196289062, "max": 1429.7469482421875, "pos_frac": 0.75, "sample": [89.97219848632812, 540.642578125, 142.02891540527344, 96.7247314453125, -119.81981658935547, 471.76922607421875, -145.08245849609375, 380.927001953125, 523.6556396484375, -25.563339233398438, 547.4712524414062, -308.2776794433594, 562.448974609375, 730.5924072265625, 165.27423095703125, 364.3276062011719, 317.5959777832031, 736.7554931640625, 689.495849609375, 142.0238800048828, 58.24407958984375, 1395.0087890625, 366.0285949707031, 310.0613708496094, 893.8206787109375, 584.45361328125, 137.43710327148438, 258.1310119628906, -110.06838989257812, 239.22854614257812, -262.3121032714844, -98.22794342041016, -577.8319091796875, 161.6419677734375, 744.0882568359375, -354.32647705078125, -108.70906066894531, -156.28302001953125, 490.7217102050781, -249.5479278564453, 548.4832763671875, 298.9754333496094, 692.900634765625, 252.05062866210938, 747.06591796875, 1110.517578125, -191.28729248046875, 132.8924560546875, 887.08740234375, -514.3088989257812, 241.7187042236328, 1429.7469482421875, 204.92909240722656, 741.4830322265625, -456.975830078125, 27.123565673828125, 370.09698486328125, 106.13656616210938, -400.71466064453125, 431.5265808105469, 501.9712829589844, 488.2995910644531, 1291.46337890625, 196.16688537597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 470.54376220703125, "std": 461.91754150390625, "min": -540.6650390625, "p10": -50.74246444702145, "median": 497.5836639404297, "p90": 960.7768981933594, "max": 2051.390625, "pos_frac": 0.875, "sample": [-282.200439453125, 976.0267333984375, 424.0769958496094, 274.9817199707031, 18.81536865234375, 820.9678955078125, 498.658203125, 106.24300384521484, 21.594024658203125, 617.8001098632812, 772.7891235351562, 656.4207763671875, -358.3007507324219, 939.5657958984375, 322.3157653808594, 693.0889282226562, 527.2799072265625, 167.57957458496094, 385.38702392578125, 733.2118530273438, 778.5632934570312, 579.0105590820312, 788.2122192382812, -179.7229766845703, 999.0249633789062, 226.43099975585938, 496.5091247558594, 468.7484130859375, -446.1500549316406, 241.83038330078125, 839.144775390625, 195.1737060546875, 576.5550537109375, 37.166629791259766, 340.4009094238281, 573.9061889648438, 824.7545776367188, 262.55181884765625, 356.3006591796875, 285.54595947265625, 218.05975341796875, -66.2867431640625, 815.288818359375, 578.9928588867188, 260.6178894042969, 567.3223266601562, -14.472480773925781, 130.83596801757812, 14.622886657714844, 540.705078125, 583.2286987304688, 1410.92822265625, 2051.390625, 1327.9561767578125, 509.77899169921875, 969.8673706054688, 877.7020263671875, -71.8462905883789, 1515.7237548828125, 781.5982055664062, 624.5858154296875, -540.6650390625, 353.19500732421875, 115.4117202758789], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 396.10296630859375, "std": 499.48736572265625, "min": -1346.3699951171875, "p10": -214.08016052246091, "median": 408.97564697265625, "p90": 914.915966796875, "max": 1837.8199462890625, "pos_frac": 0.84375, "sample": [912.99951171875, 1447.1923828125, 500.6614074707031, 201.34104919433594, -219.1689453125, 29.461456298828125, 589.8316040039062, 252.69277954101562, -9.661643981933594, 57.788604736328125, 934.2687377929688, 245.07220458984375, 302.48345947265625, 202.03485107421875, 636.573486328125, 635.3890991210938, 356.81097412109375, 494.3453674316406, 227.7351837158203, 58.69799041748047, -658.1807250976562, 992.1699829101562, 703.6043701171875, 232.58419799804688, 1837.8199462890625, -169.4985809326172, 438.36383056640625, 257.63330078125, 379.58746337890625, 490.5828857421875, 708.2203979492188, 339.865966796875, -229.38766479492188, 101.72128295898438, 292.00360107421875, 499.5809326171875, 49.611366271972656, 340.6844482421875, -202.20632934570312, 902.6553955078125, 888.729248046875, 260.0908508300781, 520.8978271484375, -681.8444213867188, 763.767822265625, 690.5928955078125, 134.3833465576172, 915.7373046875, 214.4389190673828, 742.658447265625, 890.5111083984375, -234.84124755859375, 1027.9681396484375, -1346.3699951171875, 1000.4232177734375, 786.670166015625, -269.65185546875, 557.5234985351562, 701.1934204101562, 482.3627624511719, 96.40489196777344, 488.981689453125, 834.9829711914062, 721.0136108398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 338.7154541015625, "std": 483.32550048828125, "min": -908.1202392578125, "p10": -305.4308380126953, "median": 356.973876953125, "p90": 963.8281616210938, "max": 1228.3673095703125, "pos_frac": 0.78125, "sample": [543.4033203125, -601.8351440429688, 355.42510986328125, -295.6103515625, 950.8656005859375, 358.52264404296875, 597.6998291015625, 202.3474578857422, 654.4549560546875, 54.83116149902344, 244.9322509765625, 1007.1543579101562, -449.68115234375, 848.6710205078125, -35.20821762084961, 110.3828125, 397.12274169921875, 60.93657302856445, 560.4052734375, 153.63983154296875, 566.0338134765625, 155.44046020507812, 606.1590576171875, 456.33148193359375, 803.5492553710938, 62.62240982055664, 12.510108947753906, 969.383544921875, 255.0850067138672, 569.1093139648438, 485.55535888671875, 120.27135467529297, 748.44775390625, 1228.3673095703125, -908.1202392578125, 990.7473754882812, 363.3971862792969, 699.7169189453125, 1171.1602783203125, 504.677734375, 931.0022583007812, -1.1652336120605469, 162.8721923828125, 166.47116088867188, 659.7247924804688, 1175.6583251953125, -153.58975219726562, -549.3624267578125, 3.4711532592773438, 70.8040771484375, 1085.845458984375, 245.2025604248047, 824.2222900390625, -19.814857482910156, 896.315673828125, 416.8685302734375, -563.4241943359375, -10.96624755859375, 899.462890625, -547.083251953125, 485.1271057128906, -6.322196960449219, -309.6396179199219, 237.20278930664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 344.94549560546875, "std": 691.1635131835938, "min": -1372.1434326171875, "p10": -391.53819885253904, "median": 302.49952697753906, "p90": 1324.9438232421876, "max": 1915.69580078125, "pos_frac": 0.75, "sample": [861.5558471679688, 1660.9798583984375, 620.1206665039062, 1297.8321533203125, -822.6565551757812, -1372.1434326171875, 587.369873046875, 510.9756164550781, 467.4655456542969, 299.2591857910156, 222.93783569335938, 481.3094482421875, 1915.69580078125, 178.9298858642578, 483.745849609375, 575.9805908203125, 57.24842071533203, 791.3795166015625, 1370.4136962890625, 839.119873046875, 305.7398681640625, 46.98960494995117, 47.277652740478516, 536.1378173828125, -390.5595703125, -391.9576110839844, -643.779052734375, 494.3034362792969, 1387.0712890625, 380.18829345703125, 1102.2579345703125, 136.27146911621094, 479.5159912109375, -502.5356750488281, 1345.94482421875, 16.544830322265625, 1336.5631103515625, 111.51849365234375, -202.37908935546875, 868.6725463867188, 178.7734375, -1183.2933349609375, 166.48675537109375, -337.2974853515625, 28.667556762695312, 373.5972900390625, 12.3199462890625, -333.64019775390625, -276.15240478515625, 682.1768188476562, 953.9675903320312, 901.924560546875, 1238.7930908203125, 193.31979370117188, 1008.0152587890625, -3.4680862426757812, 654.1277465820312, -91.19192504882812, -314.68450927734375, 1793.45556640625, -1176.6011962890625, -75.10498046875, 50.03972625732422, 140.9755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 424.33441162109375, "std": 558.58251953125, "min": -1014.62744140625, "p10": -114.35799713134766, "median": 327.26109313964844, "p90": 1306.3058227539063, "max": 2087.812744140625, "pos_frac": 0.859375, "sample": [438.8289489746094, 116.30626678466797, 1323.299072265625, 397.5126953125, 1341.75927734375, 1143.4393310546875, 421.9466552734375, 521.589599609375, 485.4667663574219, 366.8551330566406, 320.2395935058594, 573.243896484375, 166.5986328125, 355.6669006347656, 194.82894897460938, 69.7962646484375, 177.34800720214844, -186.41986083984375, 390.3830871582031, 280.5137939453125, 234.314208984375, 23.050548553466797, 82.51891326904297, 324.6318664550781, 417.40130615234375, 1266.6549072265625, 233.698486328125, 1429.1617431640625, 1137.080078125, 203.92477416992188, -517.2420043945312, 37.19715881347656, 727.3363647460938, 223.7152099609375, 541.9698486328125, 654.6627197265625, 171.21099853515625, 190.59378051757812, 556.05712890625, -1014.62744140625, 1863.8177490234375, 89.55535888671875, 463.0738525390625, 323.3361511230469, 329.89031982421875, 41.715484619140625, -141.7899169921875, 480.4083557128906, 17.234329223632812, 1347.2994384765625, 359.5461120605469, 2053.654541015625, 262.9426574707031, 647.77978515625, -30.88933563232422, 507.878662109375, 533.103759765625, 2087.812744140625, 469.1103820800781, -110.42401123046875, 316.2529296875, -276.0986328125, -116.04399108886719, -184.2786865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 404.11572265625, "std": 476.52508544921875, "min": -861.827392578125, "p10": -105.64086074829099, "median": 443.3179473876953, "p90": 1108.3641601562501, "max": 1536.84521484375, "pos_frac": 0.828125, "sample": [743.66748046875, 845.771484375, 177.3033447265625, 496.9371643066406, 1536.84521484375, 465.4997863769531, 1124.212890625, 110.69121551513672, -41.99320983886719, 182.90350341796875, 628.005615234375, -35.99446105957031, -336.80621337890625, 138.43252563476562, 1208.30859375, -8.811531066894531, 467.6631774902344, 574.099609375, 560.8328857421875, 495.0758361816406, 1071.3837890625, 233.16390991210938, 91.99522399902344, 136.7269744873047, 1031.670166015625, 15.31640625, 609.4945678710938, 342.7602844238281, 1386.431884765625, -346.50103759765625, 665.531982421875, -114.34944152832031, 712.76708984375, 269.5197448730469, 509.8594970703125, 237.47410583496094, 12.443687438964844, -199.44436645507812, 37.412811279296875, -413.68109130859375, 500.8202209472656, 575.0912475585938, 876.9534912109375, 72.26329040527344, -358.9095458984375, 181.43643188476562, 239.76809692382812, 919.4379272460938, 352.44915771484375, 91.47583770751953, 1193.5999755859375, 649.8646240234375, 601.2635498046875, 1149.452880859375, 1356.120361328125, 641.8193969726562, 494.6690368652344, 480.5345153808594, 74.43003845214844, -85.32083892822266, 123.32794189453125, 421.1361083984375, 550.9292602539062, -861.827392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 372.99566650390625, "std": 469.57769775390625, "min": -1081.9097900390625, "p10": -188.1822036743164, "median": 398.5979766845703, "p90": 916.4060974121095, "max": 1448.4193115234375, "pos_frac": 0.78125, "sample": [218.0120849609375, 474.954345703125, -76.72552490234375, 195.38064575195312, 267.3999938964844, -128.3125, 262.8813781738281, 129.32467651367188, -107.38104248046875, -520.1324462890625, 512.0711669921875, 752.996826171875, 513.573974609375, 389.3021240234375, 620.4729614257812, 853.4658203125, -189.16917419433594, 874.6764526367188, -1081.9097900390625, 758.7300415039062, -230.06027221679688, 656.3097534179688, 151.71429443359375, -318.79864501953125, 279.59722900390625, 228.40927124023438, -158.92800903320312, 321.3338623046875, 450.79693603515625, 407.8938293457031, 613.952392578125, -347.21478271484375, 1024.6861572265625, 140.97023010253906, 2.4620914459228516, 898.911865234375, 460.51446533203125, 493.2960205078125, 1448.4193115234375, 675.6357421875, 1232.690185546875, 151.15145874023438, 686.829833984375, 796.1162109375, -115.10298156738281, 1279.3187255859375, 489.7653503417969, 308.455810546875, 655.3307495117188, 866.0183715820312, 479.9697265625, -185.8792724609375, -120.45086669921875, 283.3850402832031, 542.9323120117188, 949.2647705078125, -339.70733642578125, 1167.4266357421875, 760.4599609375, 66.95597839355469, 923.9036254882812, 190.29171752929688, 380.01715087890625, 503.0661926269531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 326.601806640625, "std": 506.4250793457031, "min": -807.8607177734375, "p10": -232.05289459228516, "median": 236.25523376464844, "p90": 820.9632141113282, "max": 1969.1571044921875, "pos_frac": 0.75, "sample": [1198.0877685546875, -35.109100341796875, 100.0465087890625, 855.5552978515625, 178.45263671875, 661.6415405273438, 775.0750732421875, 1969.1571044921875, 824.21240234375, 239.75070190429688, -11.007949829101562, 453.3077392578125, -145.6298370361328, 180.68612670898438, 392.6742248535156, -163.90435791015625, -236.65728759765625, -191.5249786376953, 232.759765625, 54.11642074584961, 1767.5791015625, -221.30931091308594, 304.5442810058594, 346.8484802246094, 680.2628173828125, 182.9970703125, -251.76055908203125, -54.21405029296875, -389.2109069824219, 433.60858154296875, -352.6220397949219, 938.001220703125, 668.6854858398438, 813.3817749023438, 457.7619323730469, 724.15673828125, 573.5331420898438, 203.03968811035156, 409.53546142578125, 475.65216064453125, 98.92020416259766, 113.0593490600586, 157.89183044433594, -21.589282989501953, 688.4217529296875, 1636.0657958984375, 229.00238037109375, -807.8607177734375, 702.564208984375, 56.29266357421875, -399.7242431640625, 277.8621826171875, 7.635934829711914, 351.02618408203125, 105.37757873535156, 673.6925659179688, 27.255685806274414, -202.10240173339844, 505.72589111328125, 203.50405883789062, 594.6183471679688, 688.5936889648438, -402.9371032714844, 577.0574340820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 393.58203125, "std": 594.5914306640625, "min": -914.2252807617188, "p10": -223.21406402587886, "median": 252.62144470214844, "p90": 1211.814013671875, "max": 2170.464599609375, "pos_frac": 0.796875, "sample": [-914.2252807617188, -362.80303955078125, 866.248779296875, -364.3938293457031, 392.28680419921875, 134.99142456054688, 714.4512939453125, -244.49951171875, 23.945863723754883, -492.1836853027344, 324.27325439453125, 27.152591705322266, 163.44224548339844, 360.6819763183594, 32.846832275390625, 979.9588623046875, -54.83876037597656, 575.5971069335938, 249.665283203125, 135.461669921875, 1208.947021484375, 39.074981689453125, -0.552978515625, 25.10980224609375, 192.816162109375, 457.49163818359375, 378.6042785644531, -127.3227767944336, 314.02435302734375, 776.8436279296875, 45.101776123046875, 781.3541259765625, 244.51138305664062, 832.7808837890625, 2170.464599609375, 1629.48388671875, 203.9841766357422, 1498.553466796875, 205.1111297607422, 492.6615295410156, 343.0831298828125, 1159.6700439453125, 109.04971313476562, 287.60052490234375, -329.0391845703125, 1290.2701416015625, 1676.43603515625, 152.0454864501953, 1213.042724609375, 211.60804748535156, 2042.59912109375, -173.5480194091797, -117.81440734863281, -92.99251556396484, 820.6223754882812, 737.0366821289062, 658.4908447265625, 255.57760620117188, 330.8310852050781, -363.25421142578125, 218.48963928222656, 429.4863586425781, 278.9627380371094, 133.89405822753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 189.72039794921875, "std": 703.5259399414062, "min": -1805.8011474609375, "p10": -433.12962646484374, "median": 180.0978012084961, "p90": 825.2387939453125, "max": 3113.601806640625, "pos_frac": 0.734375, "sample": [-343.1841735839844, 770.1514892578125, 145.1547088623047, 58.56932067871094, -351.6322937011719, 708.2892456054688, 22.30950927734375, 248.05233764648438, 432.1905517578125, 417.8824768066406, 811.8304443359375, 688.1359252929688, 98.59324645996094, 75.14935302734375, -451.1961669921875, -443.1643371582031, 207.76644897460938, -219.14622497558594, 172.18299865722656, 1234.28662109375, -305.6956481933594, 27.719707489013672, 598.5740356445312, 308.4053039550781, 121.27261352539062, 135.814697265625, 329.06768798828125, 359.47772216796875, 167.67953491210938, 108.34445190429688, -1805.8011474609375, 86.14478302001953, -409.7153015136719, 126.63063049316406, -1300.57421875, -1435.411865234375, 3113.601806640625, 268.82586669921875, 389.217529296875, 1262.572509765625, 301.7537841796875, 357.4267578125, 830.9852294921875, 614.444091796875, 280.12933349609375, 625.7827758789062, -917.3389892578125, -384.0097351074219, 263.30645751953125, 953.266357421875, 386.9578857421875, 902.6998291015625, -279.96234130859375, -134.27911376953125, 60.02469253540039, -37.404273986816406, 750.5451049804688, -1223.193359375, 422.97509765625, 188.01260375976562, -214.04344177246094, 317.1441650390625, 168.39578247070312, 1480.1146240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 314.1850891113281, "std": 572.1278076171875, "min": -1310.5313720703125, "p10": -283.52698669433585, "median": 342.2422790527344, "p90": 898.8933532714844, "max": 2194.150390625, "pos_frac": 0.75, "sample": [-1022.4097290039062, 428.2294616699219, 281.13580322265625, 965.0123291015625, 773.0157470703125, -314.6352233886719, 838.9632568359375, 54.13050079345703, 350.5345764160156, 613.5150756835938, 909.5010375976562, 2194.150390625, 41.33287048339844, 298.3906555175781, 32.582481384277344, 800.4033203125, 445.35223388671875, 7.318809509277344, -481.42218017578125, 485.79827880859375, -9.228744506835938, 577.1978759765625, -210.94110107421875, 39.94045639038086, 238.49722290039062, 700.0223999023438, 676.34130859375, 240.11090087890625, 475.26446533203125, 401.56304931640625, 412.27923583984375, -14.846797943115234, -13.718940734863281, 183.21685791015625, 553.8742065429688, 315.4132080078125, 587.0240478515625, 98.989013671875, -149.28526306152344, -351.55535888671875, 793.6676025390625, -63.840576171875, 569.64013671875, 357.43402099609375, 124.55620574951172, 308.0097961425781, 1318.4698486328125, 1122.235107421875, -61.57316970825195, -778.0673828125, -122.10877227783203, 874.14208984375, 333.9499816894531, 604.2796020507812, 359.9425964355469, 145.79335021972656, 425.9959716796875, -872.449462890625, 567.5470581054688, -169.4007568359375, 392.0313720703125, 1255.447021484375, -1310.5313720703125, 1481.6182861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 404.2965087890625, "std": 443.9430847167969, "min": -357.0819396972656, "p10": -173.84071960449216, "median": 359.6017303466797, "p90": 975.1050476074223, "max": 1554.591552734375, "pos_frac": 0.8125, "sample": [-12.91241455078125, 103.06199645996094, 364.3113708496094, 218.84417724609375, -321.52459716796875, 717.2357177734375, 1433.4144287109375, 325.6181945800781, 1110.3675537109375, 1019.565673828125, 266.5218811035156, 292.2640686035156, 436.5059814453125, 25.6676025390625, 377.281005859375, -357.0819396972656, 662.3427734375, 32.29356384277344, 748.6704711914062, -271.1585693359375, 817.6766967773438, 653.9957885742188, 759.39453125, 128.2215576171875, 120.42935180664062, 460.0540466308594, 354.89208984375, 482.7149963378906, 80.41651916503906, 744.787109375, 243.99427795410156, 588.243408203125, -59.542022705078125, 303.0664367675781, 712.095947265625, 761.92333984375, 333.2668762207031, 603.2077026367188, -140.05055236816406, 1226.56884765625, 348.2635803222656, -31.49749755859375, 247.88607788085938, -188.3222198486328, 871.3635864257812, 87.98066711425781, 520.4466552734375, -111.02345275878906, 198.84063720703125, 575.68505859375, -302.4326477050781, 719.9254150390625, 598.223876953125, 5.608856201171875, 1422.3629150390625, -265.3990478515625, 185.04505920410156, 1554.591552734375, 682.0758056640625, 1050.5040283203125, 395.2925109863281, 762.95068359375, -245.0103302001953, 444.9683532714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 315.4133605957031, "std": 442.4879150390625, "min": -546.1112670898438, "p10": -203.12283325195312, "median": 290.4779357910156, "p90": 829.1184509277344, "max": 1789.1854248046875, "pos_frac": 0.78125, "sample": [589.7576904296875, 196.2613983154297, -45.02764129638672, 515.5025024414062, 293.2555847167969, 448.6192932128906, 1239.530517578125, 163.39071655273438, 104.57882690429688, -255.6197509765625, 1269.8968505859375, 103.61536407470703, 291.79937744140625, 401.6829833984375, 816.8474731445312, 430.5565185546875, 602.652099609375, 98.28414154052734, 226.41995239257812, -0.22377777099609375, 563.5470581054688, 388.03338623046875, -57.27400207519531, 1214.86767578125, 620.4302978515625, 612.1275634765625, -546.1112670898438, -204.9140625, 498.558349609375, 97.68444061279297, -2.2145156860351562, 189.9852294921875, -355.2910461425781, -206.47695922851562, 488.13018798828125, -187.33238220214844, -198.94329833984375, -519.6286010742188, 576.1776123046875, 113.64604949951172, 47.486270904541016, 684.2570190429688, 289.156494140625, -173.97950744628906, 160.0582733154297, 156.75950622558594, 758.4389038085938, 834.37744140625, 1789.1854248046875, 1107.1092529296875, 310.366943359375, 16.163040161132812, 292.9595642089844, 294.0903625488281, 154.40916442871094, 116.65255737304688, 504.7777404785156, 478.84710693359375, 232.11000061035156, 307.8750915527344, 138.380126953125, 1070.84228515625, 403.03741455078125, -363.6882019042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 321.58892822265625, "std": 544.1513671875, "min": -1552.1502685546875, "p10": -260.7897094726562, "median": 240.69829559326172, "p90": 989.8192199707032, "max": 1998.415771484375, "pos_frac": 0.734375, "sample": [-425.7591552734375, 378.73388671875, 885.4978637695312, -587.2241821289062, 1344.44677734375, 238.47984313964844, 231.12496948242188, 872.8174438476562, 644.65380859375, 1047.6685791015625, 109.3090591430664, 128.96591186523438, 190.29576110839844, 768.386474609375, 999.1319580078125, -73.83370208740234, 831.176513671875, 485.0956115722656, 1064.9659423828125, -157.3829345703125, 567.5443115234375, 67.07330322265625, -34.131591796875, 284.8597717285156, -397.0165100097656, 582.23193359375, 430.47698974609375, 209.84396362304688, -32.131195068359375, -41.2655029296875, -578.692138671875, 520.8443603515625, -1552.1502685546875, -459.9894714355469, 1095.2552490234375, 60.855804443359375, 225.51492309570312, 182.79052734375, 563.5765380859375, 105.27590942382812, 511.93719482421875, -150.49298095703125, 504.942626953125, 940.8212280273438, 100.83114624023438, 242.916748046875, 281.8410949707031, 959.870361328125, 464.0434265136719, 255.48995971679688, 970.9224243164062, -219.54473876953125, -56.835105895996094, 235.1207275390625, -19.801559448242188, 997.9178466796875, 485.0158996582031, -41.063812255859375, 23.990509033203125, -278.46612548828125, 153.88046264648438, 676.6693725585938, 1998.415771484375, 765.9496459960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 362.6502685546875, "std": 455.58203125, "min": -596.995849609375, "p10": -169.39709472656247, "median": 330.9967803955078, "p90": 1028.4007080078125, "max": 1301.0604248046875, "pos_frac": 0.8125, "sample": [-144.8256378173828, 159.35414123535156, 788.7846069335938, 157.46856689453125, 4.598899841308594, 340.28228759765625, 294.0115661621094, -382.23095703125, 406.87255859375, -74.16720581054688, 394.2149658203125, 581.8509521484375, -596.995849609375, -404.95263671875, 221.67092895507812, 835.1115112304688, 758.69921875, 1301.0604248046875, 323.0543212890625, 101.6829833984375, -253.22486877441406, -129.64242553710938, -388.64300537109375, 338.9392395019531, 379.9949951171875, 1034.8726806640625, 538.3443603515625, 494.19183349609375, -92.57839965820312, 220.43251037597656, 1260.78515625, 801.7122192382812, 347.49066162109375, 264.792236328125, 346.18377685546875, 300.2668151855469, 1121.909423828125, 1297.8157958984375, 737.5177001953125, -179.92771911621094, 734.1046142578125, 9.574440002441406, 172.67800903320312, -120.79666900634766, 880.666748046875, 266.7293395996094, 125.56299591064453, 1109.3284912109375, 212.99986267089844, 313.74554443359375, 600.1804809570312, 16.803489685058594, 355.150390625, 578.5100708007812, 231.59300231933594, 227.33355712890625, 1013.2994384765625, 567.4415893554688, 1269.1817626953125, -592.2822265625, 599.6729125976562, 501.36297607421875, 607.4963989257812, 22.501296997070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 345.0152282714844, "std": 532.1683959960938, "min": -1171.6148681640625, "p10": -211.18779602050776, "median": 268.1337585449219, "p90": 921.6525512695314, "max": 2113.618896484375, "pos_frac": 0.703125, "sample": [-30.769615173339844, -76.4485855102539, 896.4295654296875, 705.54345703125, 1045.728271484375, 1240.879638671875, -428.19561767578125, -164.33126831054688, 145.8681182861328, 52.5062255859375, 663.99853515625, -104.19662475585938, -101.10018157958984, 277.58502197265625, 828.289306640625, 223.15582275390625, 204.2040557861328, 111.08331298828125, 741.3589477539062, 397.2867736816406, 145.70828247070312, 453.498779296875, -65.10731506347656, 395.84246826171875, 2113.618896484375, 1084.1939697265625, 523.8416137695312, 655.2284545898438, 661.5289916992188, 582.3703002929688, 932.46240234375, 192.9814453125, 253.37173461914062, 834.49658203125, -604.087890625, 1605.19677734375, -5.903289794921875, 493.59185791015625, 232.24496459960938, 92.36515808105469, 330.3798522949219, 888.6796875, 578.6263427734375, 499.3538818359375, -108.10929870605469, -1171.6148681640625, 467.56732177734375, -42.7197265625, -285.6438293457031, -21.875856399536133, 185.11814880371094, -268.8149719238281, -231.2691650390625, -374.8321838378906, 441.644775390625, -0.9769515991210938, 504.00634765625, -84.3250732421875, 99.14910888671875, 1374.5716552734375, 430.52459716796875, 840.0697021484375, 258.6824951171875, 566.4629516601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 283.5472412109375, "std": 506.1803283691406, "min": -762.9913940429688, "p10": -281.883056640625, "median": 265.6325454711914, "p90": 959.5365356445313, "max": 1379.293212890625, "pos_frac": 0.71875, "sample": [-226.7077178955078, 232.24156188964844, -15.893646240234375, 42.255313873291016, 279.28179931640625, 292.7213439941406, 216.9220428466797, 964.1644897460938, 389.6216735839844, 861.1160278320312, 452.83453369140625, 1120.07177734375, 425.9168701171875, -290.85577392578125, -260.94671630859375, -753.6846923828125, 466.4446105957031, 769.4459838867188, 48.119415283203125, 580.1757202148438, 178.73348999023438, -217.92124938964844, 9.869949340820312, 428.57513427734375, 649.8622436523438, 170.44432067871094, 1379.293212890625, 251.98329162597656, 668.3602294921875, -180.20899963378906, 812.229736328125, 949.6369018554688, 941.5511474609375, 25.991527557373047, -702.4622802734375, 282.2420654296875, 440.6495666503906, -200.37901306152344, 469.23699951171875, -762.9913940429688, -741.308349609375, -736.2139892578125, 486.2137756347656, 1206.2318115234375, 1183.5634765625, -220.30825805664062, 307.2843933105469, 636.6919555664062, -24.76681137084961, 394.1712951660156, -14.249229431152344, -338.30584716796875, 172.9447021484375, 215.90957641601562, 142.4208984375, 179.4373779296875, 196.712158203125, -58.37680435180664, 588.0670776367188, 1093.8082275390625, 640.7268676757812, -95.91556549072266, 963.7792358398438, 780.5634155273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 420.6690368652344, "std": 510.2265319824219, "min": -710.74072265625, "p10": -192.13433227539062, "median": 363.638916015625, "p90": 1040.7287963867188, "max": 1767.873291015625, "pos_frac": 0.78125, "sample": [1767.873291015625, -710.74072265625, 540.6575927734375, 266.22784423828125, -72.56105041503906, 23.478591918945312, -175.80596923828125, 493.3200378417969, 375.35009765625, 1278.786865234375, 87.57793426513672, -317.7643737792969, 104.05152893066406, -228.3704833984375, 330.96697998046875, 747.8714599609375, -149.95425415039062, 627.4612426757812, 1188.7515869140625, -29.503311157226562, 271.0611572265625, 610.6785888671875, 270.5948791503906, 656.6456909179688, 373.81976318359375, 223.95849609375, 166.46969604492188, 997.639404296875, 763.3922119140625, -387.3164978027344, 156.9669189453125, 617.3848876953125, 483.84063720703125, -258.936767578125, 637.4584350585938, 697.6797485351562, -314.4615478515625, -40.37508010864258, 779.402587890625, 359.96844482421875, -199.1322021484375, 196.0823211669922, 941.4835815429688, 918.4401245117188, 115.3383560180664, 147.92584228515625, 1734.9510498046875, 131.98544311523438, 1522.29931640625, 1211.609619140625, 367.30938720703125, -5.9125213623046875, 199.18795776367188, 691.8855590820312, 481.36041259765625, 411.574951171875, 242.21231079101562, 972.37841796875, 800.51416015625, 821.2264404296875, 1059.1956787109375, -142.5502166748047, 256.92486572265625, 832.9808959960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 213.079345703125, "std": 577.0432739257812, "min": -1741.8922119140625, "p10": -463.22019348144516, "median": 201.6765899658203, "p90": 878.4585693359377, "max": 1523.173828125, "pos_frac": 0.703125, "sample": [459.6520690917969, -43.94255065917969, 293.8012390136719, -736.464111328125, 658.5907592773438, 727.30517578125, -987.8453369140625, 689.2496337890625, 906.5269165039062, 425.3149108886719, 317.83087158203125, 503.1575012207031, -19.659469604492188, 115.4517822265625, 896.2583618164062, 945.7354736328125, 195.35995483398438, 16.384571075439453, 377.2859802246094, 341.9317626953125, 108.90223693847656, 504.5090026855469, 321.0006103515625, 777.1773071289062, -1029.1839599609375, 412.0101623535156, 207.99322509765625, 836.9257202148438, 313.018310546875, 461.7156677246094, 508.9961242675781, 622.4141845703125, 620.2406616210938, 135.37393188476562, -153.27987670898438, 126.04129028320312, 1013.1884155273438, -327.1758728027344, 1281.7008056640625, -521.52490234375, 478.4762878417969, 70.19709014892578, -217.09393310546875, 48.795143127441406, 123.25001525878906, 125.17538452148438, -1741.8922119140625, 410.0106201171875, 1523.173828125, -19.33307647705078, -26.35601806640625, -145.28480529785156, -4.0023193359375, 361.56658935546875, -1229.8974609375, 185.18475341796875, 165.63009643554688, 1281.3387451171875, -177.17889404296875, 154.72251892089844, -149.96224975585938, -83.92057037353516, 768.0928955078125, -565.5841064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 454.625732421875, "std": 555.1013793945312, "min": -750.1688232421875, "p10": -208.3105941772461, "median": 375.68821716308594, "p90": 1144.2241577148438, "max": 2547.64404296875, "pos_frac": 0.8125, "sample": [-126.73554992675781, 84.08889770507812, 1346.9102783203125, 795.0961303710938, -332.9935302734375, 748.5593872070312, 344.287841796875, 383.253173828125, 736.513427734375, 646.858154296875, 458.920654296875, 690.6212158203125, 499.14764404296875, 270.6422119140625, 223.23507690429688, 249.58689880371094, 1097.3077392578125, 778.1570434570312, -411.18646240234375, 490.7672119140625, 368.1232604980469, -42.65825653076172, 783.7269287109375, 992.427734375, 112.03268432617188, -56.844032287597656, 1532.606201171875, 613.6596069335938, 2.8146209716796875, 1121.8638916015625, -370.1749572753906, 1153.80712890625, 108.26351165771484, 834.044677734375, 66.91587829589844, 1167.66650390625, 262.1495666503906, 633.9932861328125, 299.3558349609375, 909.8784790039062, 761.4744873046875, 596.83837890625, 589.8988037109375, -410.40618896484375, 224.83261108398438, 213.92132568359375, 404.622802734375, 295.41668701171875, 1045.2855224609375, -208.30287170410156, 240.5020751953125, -750.1688232421875, 171.7384033203125, 2547.64404296875, -245.9492645263672, 138.42576599121094, 1276.6494140625, 275.3548583984375, 440.29803466796875, -159.59600830078125, 853.021728515625, 1287.6968994140625, 248.47357177734375, -208.31390380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 402.230712890625, "std": 490.09381103515625, "min": -685.7344970703125, "p10": -80.28419189453125, "median": 304.38096618652344, "p90": 993.5505981445312, "max": 1808.989013671875, "pos_frac": 0.8125, "sample": [408.25103759765625, -80.72673034667969, 1601.718017578125, 159.32554626464844, -79.25160217285156, -302.61297607421875, 471.98419189453125, 9.497734069824219, 1664.69140625, 282.42572021484375, 136.498291015625, 526.21826171875, 390.39288330078125, 481.8450927734375, 765.4849853515625, 647.9207153320312, 326.3362121582031, 147.96539306640625, -298.6038818359375, -685.7344970703125, 330.1884460449219, -23.360301971435547, 852.642333984375, 731.1175537109375, 1808.989013671875, 22.950469970703125, 429.90570068359375, 685.8720092773438, 9.340023040771484, -56.61517333984375, -87.83950805664062, 64.61846923828125, 772.2841796875, 991.9283447265625, 72.4013442993164, 241.50982666015625, 261.858154296875, 50.336219787597656, 133.3124542236328, -173.81602478027344, 87.3598403930664, 264.0169982910156, 902.556640625, 373.9080505371094, 1218.9451904296875, 164.4102783203125, 867.2727661132812, 807.738037109375, 225.68260192871094, -261.9061584472656, -56.78157043457031, 785.5151977539062, 761.3397827148438, 746.6394653320312, 79.91545104980469, 1140.4453125, 1029.9539794921875, 432.90008544921875, 582.5875244140625, 136.26596069335938, 778.5029907226562, 49.61759948730469, -59.61566925048828, 994.245849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 383.07708740234375, "std": 546.0009155273438, "min": -993.3787841796875, "p10": -172.91892395019525, "median": 277.4463348388672, "p90": 1188.2625, "max": 1897.493408203125, "pos_frac": 0.765625, "sample": [602.3055419921875, 637.5018920898438, -51.19682312011719, 298.2784423828125, 419.0969543457031, 11.924049377441406, 745.1780395507812, 122.58474731445312, 387.93359375, 379.0228576660156, 197.71728515625, -66.10413360595703, 1205.5531005859375, 1001.884765625, 224.80987548828125, 1201.67138671875, -603.3565063476562, -2.843271255493164, 1190.25732421875, 927.186279296875, 904.54541015625, 1236.6519775390625, 295.2372741699219, 548.125244140625, 1897.493408203125, 1299.709716796875, -45.50218963623047, 95.3761978149414, 829.1250610351562, -107.00716400146484, -92.9406509399414, -24.12506103515625, -384.8282470703125, 264.8516540527344, 290.041015625, 725.8394775390625, 116.58885192871094, 193.85025024414062, 149.3486328125, 616.2376098632812, -120.26727294921875, 174.0010986328125, 1756.7589111328125, 503.90179443359375, -195.48391723632812, 511.70294189453125, 412.3243408203125, 414.80596923828125, 260.71697998046875, -590.8193359375, 1183.60791015625, 951.7008056640625, 20.57632827758789, -341.2409973144531, 104.43379211425781, -234.26341247558594, 952.0394287109375, 214.3923797607422, 263.3140563964844, 252.03009033203125, 81.27729034423828, -993.3787841796875, 814.8707275390625, 481.9094543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 428.12445068359375, "std": 624.8690185546875, "min": -1089.6646728515625, "p10": -336.70275878906244, "median": 406.96607971191406, "p90": 1192.9112670898442, "max": 2762.568115234375, "pos_frac": 0.796875, "sample": [851.4541015625, 474.56658935546875, 139.9140167236328, 565.2420654296875, 390.62188720703125, -640.1824951171875, 121.13552856445312, 233.26052856445312, 901.4420166015625, -258.0509338378906, 592.6194458007812, 1299.30908203125, 611.261962890625, -683.4953002929688, -353.6455078125, 1230.105224609375, 1060.114990234375, 761.2005615234375, -212.08311462402344, 1106.1253662109375, 1284.7310791015625, -297.169677734375, 944.9346923828125, 828.6617431640625, 77.98670959472656, -139.07992553710938, 453.08856201171875, 391.3569641113281, 44.143646240234375, 1319.4644775390625, 1005.0427856445312, 371.58575439453125, -436.73541259765625, -692.7679443359375, 253.0801239013672, 355.9020080566406, 89.09967041015625, 1286.206298828125, 997.5955200195312, 615.490234375, 356.87469482421875, 686.6204833984375, 512.9844360351562, -110.904052734375, 690.7542724609375, 800.6536865234375, 855.9947509765625, 516.1105346679688, 912.2984008789062, 1288.514404296875, 182.81405639648438, 309.4687194824219, -115.83407592773438, 422.5751953125, 743.4581298828125, 136.00331115722656, 45.39057159423828, 596.2900390625, -1089.6646728515625, 55.86177062988281, 373.9035949707031, 16.049896240234375, -492.354248046875, 2762.568115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 327.7616271972656, "std": 495.33074951171875, "min": -1416.4017333984375, "p10": -200.07597351074216, "median": 314.67881774902344, "p90": 925.4037231445317, "max": 1530.515625, "pos_frac": 0.828125, "sample": [111.53089904785156, 1306.070556640625, 237.30201721191406, -212.19778442382812, 824.042724609375, 786.7079467773438, 780.1339721679688, 454.4482116699219, 1265.3980712890625, 19.88477325439453, 202.24298095703125, -388.5733642578125, 968.2739868164062, -326.2903747558594, 42.40660858154297, 1530.515625, 350.42987060546875, -353.1400451660156, 825.3731079101562, 610.7109375, 470.68450927734375, 613.9622802734375, -1416.4017333984375, 172.16146850585938, -171.791748046875, 605.9822998046875, 28.429710388183594, 1033.2816162109375, 1215.1199951171875, 62.787010192871094, 329.91644287109375, 328.2104797363281, 522.1210327148438, 691.16259765625, 147.50650024414062, -720.8391723632812, 224.47933959960938, -640.5927734375, 524.2156372070312, 320.7453918457031, 308.61224365234375, 105.2762451171875, 614.6566162109375, -139.7574005126953, 334.60504150390625, 117.37821197509766, 183.30320739746094, 405.61468505859375, 470.78619384765625, 255.90493774414062, 146.3179168701172, 291.552001953125, 308.3265380859375, 497.704833984375, 37.76661682128906, -115.6710205078125, 232.50399780273438, 993.8233642578125, 775.3701171875, 699.3594970703125, 71.0025634765625, 569.8785400390625, -5.034454345703125, 441.05242919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 329.82708740234375, "std": 434.5216979980469, "min": -508.07733154296875, "p10": -160.87838287353514, "median": 321.96437072753906, "p90": 935.5901123046875, "max": 1458.463623046875, "pos_frac": 0.75, "sample": [-6.247261047363281, 3.4188575744628906, 1283.22998046875, 252.89756774902344, 882.5210571289062, 88.22080993652344, 440.052001953125, -90.18427276611328, -104.95462036132812, 337.9657897949219, 384.5440979003906, 317.58526611328125, 381.6601867675781, 1041.347412109375, 294.46710205078125, 664.2094116210938, -88.72042846679688, 33.928260803222656, -508.07733154296875, -118.4482421875, 587.4091796875, -165.40884399414062, 418.5872802734375, 312.8031921386719, 1.9601287841796875, -93.6134033203125, 41.60122299194336, 518.3146362304688, 1331.7542724609375, 125.82893371582031, 511.0269775390625, 573.2440185546875, -342.1642761230469, 940.193603515625, 664.4741821289062, 12.072368621826172, 1204.4583740234375, 563.138671875, 328.88946533203125, 228.11221313476562, 559.6096801757812, 924.8486328125, 283.52935791015625, 401.8564453125, 150.8617706298828, 456.18359375, 615.865966796875, 1136.434814453125, -216.53652954101562, -175.70175170898438, -161.16160583496094, 548.7174682617188, 326.3434753417969, -89.50292205810547, 430.2188720703125, 568.5392456054688, -48.08424377441406, 463.0305480957031, 32.35099411010742, -160.217529296875, -454.5941162109375, 497.2519836425781, 308.52850341796875, 1458.463623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 327.616943359375, "std": 470.987548828125, "min": -546.0257568359375, "p10": -129.2009712219238, "median": 205.58480834960938, "p90": 960.2531555175781, "max": 1830.19921875, "pos_frac": 0.78125, "sample": [524.4066162109375, 60.73371887207031, 187.74864196777344, 49.11030578613281, 25.833303451538086, 152.57492065429688, 211.95643615722656, -97.1006851196289, 140.81027221679688, 182.38238525390625, -227.51881408691406, 105.60458374023438, 421.7346496582031, 668.1721801757812, 389.0893859863281, 101.65263366699219, -142.95823669433594, 283.18621826171875, 115.26097869873047, 836.25537109375, -66.96818542480469, 30.669557571411133, 329.4522705078125, 255.03955078125, -158.39712524414062, 199.2131805419922, 117.37510681152344, 34.049930572509766, 225.597900390625, 688.0147705078125, -36.62445068359375, 54.40623474121094, -76.72665405273438, -200.9510498046875, 1098.849365234375, 808.0216674804688, 86.25343322753906, 106.72929382324219, 525.5005493164062, -546.0257568359375, 689.2098999023438, -437.5268249511719, 1063.010009765625, -465.9230651855469, 960.5750732421875, 959.5020141601562, 510.92718505859375, 1830.19921875, 248.12388610839844, 630.231689453125, 1656.4632568359375, 707.169189453125, 422.8065185546875, 1284.432373046875, -60.145835876464844, -12.892156600952148, 770.8387451171875, 439.56768798828125, 600.8175048828125, 219.88055419921875, -18.357208251953125, 1179.7420654296875, 274.2933654785156, 52.12457275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 347.96881103515625, "std": 608.6690673828125, "min": -1121.5855712890625, "p10": -393.9668701171875, "median": 221.48523712158203, "p90": 1130.7582519531252, "max": 1728.003173828125, "pos_frac": 0.78125, "sample": [100.43548583984375, -1121.5855712890625, 1160.083251953125, -105.58487701416016, 195.33322143554688, -30.217796325683594, 137.33416748046875, 628.9326782226562, 828.7779541015625, -590.5208740234375, 217.83853149414062, -97.795654296875, 1174.92578125, 7.778230667114258, 471.3775634765625, -400.1950378417969, -205.97772216796875, 1002.5096435546875, 509.03790283203125, -739.8873291015625, 35.01337814331055, 184.58279418945312, -868.872802734375, 91.61146545410156, -906.6038818359375, 1205.715576171875, 193.70733642578125, 470.3279113769531, 404.4589538574219, 795.1002197265625, 323.690185546875, 1728.003173828125, 118.59771728515625, 755.564697265625, 6.241844177246094, -131.951904296875, 1518.2193603515625, 763.6942138671875, 1535.5400390625, 91.86640930175781, 153.7437744140625, 303.1690979003906, 1062.333251953125, 796.4960327148438, 225.13194274902344, 914.1641235351562, -569.7518920898438, 660.9780883789062, 57.22039794921875, 620.7371826171875, 340.37213134765625, -379.4344787597656, 1701.6356201171875, -39.485107421875, 829.20166015625, 876.6101684570312, 115.58343505859375, 193.11331176757812, 556.8853759765625, 832.7999877929688, 72.71041870117188, 827.0894775390625, 559.8164672851562, 101.7777099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 234.7180633544922, "std": 442.3779296875, "min": -750.37109375, "p10": -271.9290802001953, "median": 225.48308563232422, "p90": 824.5839477539064, "max": 1415.4998779296875, "pos_frac": 0.6875, "sample": [316.9576416015625, -264.1730041503906, 803.5851440429688, 343.522216796875, 1321.678955078125, 106.59915924072266, 293.0280456542969, -179.01869201660156, 360.7524108886719, 86.49725341796875, 302.2324523925781, 221.49720764160156, 153.70083618164062, -197.1477813720703, 215.12203979492188, 158.9419708251953, 714.4307861328125, 325.00457763671875, -166.4016571044922, 451.6655578613281, 243.63119506835938, 12.225534439086914, 833.5834350585938, -426.5929870605469, 249.8945770263672, 267.22308349609375, 583.7971801757812, -294.08319091796875, 1035.8846435546875, 75.1424331665039, 1010.337646484375, 394.1732482910156, -750.37109375, 128.10006713867188, -224.1582794189453, 229.46896362304688, -35.75623321533203, 1144.3408203125, 331.67352294921875, 439.25494384765625, 515.6525268554688, -612.302978515625, 1415.4998779296875, 92.93411254882812, -160.1626739501953, 210.46023559570312, -195.037353515625, 298.34716796875, -63.40984344482422, 556.9307250976562, -0.4716911315917969, -322.609130859375, 512.111328125, -3.5343475341796875, -72.2361831665039, 249.4071502685547, 336.8924560546875, 719.6575927734375, 152.11412048339844, -49.83033752441406, 1103.05224609375, -275.25311279296875, 437.301513671875, -439.8021545410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 239.11178588867188, "std": 510.04345703125, "min": -942.6174926757812, "p10": -397.7516052246093, "median": 149.40956115722656, "p90": 944.142626953125, "max": 1448.076416015625, "pos_frac": 0.6875, "sample": [161.23873901367188, 228.33091735839844, 692.377197265625, -47.84204864501953, 209.0029296875, -8.403459548950195, 98.13870239257812, 606.3389892578125, -481.7471618652344, -119.05949401855469, 67.3430404663086, 643.1085205078125, 657.9949951171875, -209.17648315429688, -942.6174926757812, 951.2457275390625, -78.95841217041016, 866.638427734375, -134.773193359375, 1448.076416015625, -336.84783935546875, 1276.905029296875, 202.65280151367188, -354.7384338378906, 361.4093933105469, 311.6669616699219, 245.05740356445312, -426.25360107421875, 136.37733459472656, 279.64984130859375, 19.08477783203125, 14.827047348022461, 212.06137084960938, -614.7678833007812, -98.11859130859375, 1067.3983154296875, 788.5287475585938, 411.8775634765625, -585.349853515625, 152.91064453125, 926.334716796875, -639.4793701171875, 1096.8538818359375, -13.040733337402344, -170.25961303710938, 691.6763916015625, 145.90847778320312, 78.86282348632812, 906.6278686523438, 79.4923095703125, 390.6340637207031, -37.76130676269531, 139.24627685546875, -416.1858215332031, 1032.5496826171875, 927.5687255859375, 428.1162109375, 47.46974182128906, 106.71168518066406, 518.3162841796875, -269.0920104980469, 1244.8121337890625, 80.00647735595703, 336.19696044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 261.7166748046875, "std": 458.7265930175781, "min": -1595.2447509765625, "p10": -161.34822692871094, "median": 201.57398986816406, "p90": 917.4900207519531, "max": 1246.8924560546875, "pos_frac": 0.703125, "sample": [1035.94482421875, 143.8173370361328, 534.52099609375, 285.4205322265625, -114.50137329101562, 1075.6458740234375, 741.2794189453125, 122.85733032226562, 203.36856079101562, -155.35308837890625, 430.6607666015625, 127.59553527832031, 1108.169677734375, 363.29443359375, 418.7314453125, 195.09909057617188, -279.6897277832031, 306.0738830566406, 116.60752868652344, 44.15953826904297, 146.1421356201172, 457.1355895996094, 467.2276916503906, 478.24627685546875, -129.50067138671875, 321.66943359375, 596.2213134765625, -70.14173126220703, -245.546630859375, -150.40182495117188, -41.64788818359375, -1595.2447509765625, 351.31341552734375, -24.173416137695312, 792.143310546875, -5.260351181030273, 1012.3742065429688, 1218.2703857421875, 128.08245849609375, -129.3304901123047, -256.2652893066406, 251.65625, 922.1030883789062, 551.50634765625, 247.40933227539062, 906.7261962890625, -394.4629211425781, 430.9916076660156, 1246.8924560546875, 790.5123901367188, 143.08096313476562, 475.9529724121094, -99.41828918457031, 167.15328979492188, 199.7794189453125, -192.96713256835938, -86.45178985595703, 418.0138854980469, 411.0792541503906, 173.3195343017578, 140.73727416992188, -67.6551513671875, -163.91757202148438, 252.81080627441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 409.18218994140625, "std": 436.4589538574219, "min": -644.5928955078125, "p10": -63.3410926818847, "median": 367.3341064453125, "p90": 966.6486877441407, "max": 1659.8389892578125, "pos_frac": 0.859375, "sample": [-2.6387081146240234, 228.99952697753906, 400.87274169921875, 385.6932678222656, 1659.8389892578125, 1267.628662109375, -644.5928955078125, 782.9588623046875, 641.888427734375, 29.690793991088867, 426.31817626953125, 0.13224411010742188, 330.8664245605469, 148.6856231689453, -162.16510009765625, 1104.8135986328125, 613.5490112304688, -187.98846435546875, 472.0984191894531, 806.9700317382812, 138.0433349609375, -489.87451171875, 255.8077392578125, 701.5819702148438, 821.5328979492188, 13.546882629394531, 792.5166015625, 720.233154296875, 332.36749267578125, 322.5904846191406, -376.00396728515625, -89.35639953613281, 348.9749450683594, -199.15826416015625, 1363.5103759765625, 474.4523010253906, 450.80291748046875, 89.9419174194336, 578.1634521484375, 1197.5072021484375, 1002.2293090820312, 630.9312744140625, -0.6420745849609375, 132.35467529296875, 235.42572021484375, 588.5006103515625, 628.2974853515625, 979.646484375, 396.2859191894531, 634.2684936523438, 186.67628479003906, 174.28834533691406, 579.3890991210938, 165.29220581054688, 130.02200317382812, 348.10430908203125, 809.59375, 659.7770385742188, 146.1546630859375, 478.67840576171875, 936.3204956054688, 142.0935516357422, 339.1537780761719, 114.01887512207031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 360.75115966796875, "std": 545.813232421875, "min": -925.635986328125, "p10": -191.05390014648435, "median": 280.1465301513672, "p90": 1022.9705383300784, "max": 2201.84228515625, "pos_frac": 0.765625, "sample": [422.1913146972656, -84.20555114746094, -29.8701171875, -85.42242431640625, 78.91070556640625, 332.84820556640625, -480.127197265625, -200.1546630859375, 335.70068359375, 46.86967468261719, 182.29371643066406, 645.099609375, 675.5540161132812, -554.9459228515625, 404.5050048828125, 453.9913024902344, -274.8114318847656, 728.1778564453125, 101.623291015625, 619.483642578125, 212.5203857421875, -71.20797729492188, 274.6568908691406, 34.410396575927734, 474.7632751464844, 1047.3863525390625, 966.0003051757812, 923.5321044921875, 16.175193786621094, 561.2464599609375, 244.61219787597656, 406.767333984375, 124.03136444091797, 180.08111572265625, 1157.7353515625, 876.3082885742188, 679.13427734375, -87.80232238769531, 2201.84228515625, 1443.7679443359375, 307.4710388183594, 12.617385864257812, 122.00931549072266, 518.6322021484375, 243.48106384277344, -406.86676025390625, 56.59159851074219, 415.26837158203125, 15.198266983032227, -222.7384796142578, -169.81878662109375, -16.019210815429688, 285.63616943359375, 507.7003173828125, 1294.4638671875, 732.1464233398438, 331.8838806152344, 779.832275390625, -925.635986328125, 1235.8973388671875, 962.7672729492188, 258.26654052734375, 1809.8365478515625, -44.218223571777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 203.48312377929688, "std": 390.7271728515625, "min": -729.6801147460938, "p10": -285.1664184570312, "median": 204.81930541992188, "p90": 724.4087768554689, "max": 1110.9793701171875, "pos_frac": 0.734375, "sample": [244.09542846679688, -304.98388671875, 911.1588134765625, 764.8175048828125, -631.3226928710938, 593.4114990234375, 89.68842315673828, 278.63824462890625, 701.458740234375, 9.737396240234375, 1110.9793701171875, 223.66836547851562, 215.42471313476562, 286.90106201171875, -318.9908447265625, 119.43231201171875, -238.9256591796875, 406.7276611328125, -729.6801147460938, 874.0639038085938, 451.2012939453125, 443.6184387207031, 734.2445068359375, 238.86428833007812, 815.8359985351562, 538.2459716796875, 6.2290191650390625, -114.05848693847656, 87.26972961425781, -465.9921569824219, 127.50615692138672, -25.88050651550293, 194.21389770507812, 386.2901611328125, 80.07463836669922, -103.41532897949219, 485.57275390625, 231.29171752929688, -427.89385986328125, 221.4679718017578, -219.90216064453125, -227.26959228515625, 624.8132934570312, 63.373756408691406, 251.9259796142578, 578.7394409179688, 55.65309143066406, 466.8240051269531, 564.2987670898438, 549.1251220703125, 123.94851684570312, -235.1433563232422, 447.81500244140625, 43.02843475341797, 189.85122680664062, -516.4163818359375, 85.41517639160156, 928.2256469726562, 53.67339324951172, -13.914573669433594, 524.5045776367188, 285.3325500488281, -58.89276885986328, -53.07611083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 196.8697509765625, "std": 429.22125244140625, "min": -940.414794921875, "p10": -244.07201538085937, "median": 195.5359878540039, "p90": 730.6155578613283, "max": 1250.520263671875, "pos_frac": 0.6875, "sample": [-227.52906799316406, 790.0797729492188, 742.2101440429688, 557.4379272460938, 12.685501098632812, -218.82577514648438, 642.1396484375, -269.38397216796875, -45.71958923339844, -125.76747131347656, 198.2537078857422, 798.2037353515625, 1069.710693359375, 405.2635498046875, 61.300132751464844, 226.84979248046875, 413.89373779296875, 163.9547576904297, 547.7444458007812, 451.0478820800781, 73.19325256347656, -55.23772430419922, 545.2221069335938, 665.150390625, -177.02635192871094, 76.02824401855469, 528.7955322265625, -197.27467346191406, -125.01002502441406, -462.3359375, 487.58837890625, 81.40867614746094, 21.060749053955078, 342.9730224609375, 23.00320816040039, 1214.655517578125, 40.942626953125, 1250.520263671875, 47.87464904785156, 94.03335571289062, 221.50961303710938, 240.86619567871094, 206.1501007080078, 703.5615234375, -817.8341064453125, 274.98272705078125, 570.9971923828125, -374.6433410644531, 237.629150390625, 192.81826782226562, 208.65591430664062, 258.5867919921875, 902.7997436523438, -166.0011444091797, -65.53972625732422, 200.97732543945312, -251.16184997558594, -88.6185302734375, -51.81977844238281, -940.414794921875, 647.5853271484375, -509.96209716796875, 329.6004943847656, -0.17625045776367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 298.13177490234375, "std": 505.1886291503906, "min": -1315.988525390625, "p10": -317.79149780273434, "median": 365.22467041015625, "p90": 830.260546875, "max": 1645.049072265625, "pos_frac": 0.734375, "sample": [-659.71142578125, 157.84915161132812, 450.5132751464844, 106.77047729492188, -118.15878295898438, 472.75628662109375, 687.2367553710938, 655.114013671875, 422.8142395019531, 719.9969482421875, 389.91302490234375, 529.2766723632812, 276.43939208984375, 813.3078002929688, -1315.988525390625, 391.20556640625, 845.7184448242188, 1007.3903198242188, -45.29705047607422, 664.719482421875, 47.73241424560547, 557.233154296875, -95.33761596679688, 831.6773071289062, -333.7431335449219, 340.2304992675781, 382.9036865234375, 117.53913116455078, -371.56549072265625, -60.89824676513672, 314.4471130371094, 353.0735778808594, -916.7197265625, 1172.633544921875, 504.4227600097656, -18.590087890625, -195.57177734375, 473.12811279296875, 658.6484375, 545.2464599609375, 1241.7353515625, -17.65843963623047, 52.90179443359375, 186.4223175048828, 592.8876953125, 557.8894653320312, -443.7840270996094, 95.92596435546875, 810.2021484375, 802.4735717773438, 220.7888946533203, 95.1475830078125, 377.3757629394531, -156.17596435546875, -358.61322021484375, 13.210235595703125, 557.5294189453125, 191.9219970703125, 826.9547729492188, 518.3492431640625, -280.5710144042969, 921.2293701171875, 1645.049072265625, -129.11373901367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 399.692626953125, "std": 514.9130859375, "min": -957.9646606445312, "p10": -178.47729339599607, "median": 425.3664855957031, "p90": 973.4971801757813, "max": 1811.34619140625, "pos_frac": 0.765625, "sample": [770.4114379882812, 176.60597229003906, 219.22647094726562, -957.9646606445312, -128.03173828125, 405.4491271972656, 511.0007629394531, 580.8084716796875, -50.36524200439453, -92.24844360351562, 633.28857421875, 536.3829956054688, 68.37618255615234, 1020.7081298828125, 708.9999389648438, 230.97616577148438, 506.5645751953125, -444.20111083984375, 1758.287841796875, 284.24896240234375, 197.848388671875, 1175.9742431640625, 686.897705078125, 139.35580444335938, -458.4609375, 381.5287170410156, 220.72958374023438, 717.1341552734375, -264.7657470703125, 975.7877197265625, 178.6917724609375, -47.115478515625, 608.127685546875, -76.7930679321289, 270.6733703613281, 33.33765411376953, 744.694091796875, 776.7964477539062, 968.152587890625, -143.4387664794922, -34.07704162597656, 506.058349609375, 672.2860107421875, 676.7841796875, 653.9800415039062, 153.48638916015625, 628.4674072265625, 1811.34619140625, 1041.2393798828125, 120.98017120361328, 506.2771911621094, 876.4586181640625, 445.2838439941406, 721.7015380859375, -103.07366943359375, 665.7230224609375, 1466.4642333984375, 323.19329833984375, -517.760498046875, -227.03712463378906, -193.49380493164062, 656.8177490234375, 7.818075180053711, 897.7227172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 371.1221008300781, "std": 558.1958618164062, "min": -963.6397705078125, "p10": -369.03787231445307, "median": 275.7947540283203, "p90": 1043.076989746094, "max": 1872.243896484375, "pos_frac": 0.796875, "sample": [133.60787963867188, -299.9855041503906, -614.2198486328125, 1872.243896484375, -398.6317443847656, 465.3734436035156, 1091.4229736328125, 561.7291259765625, 778.3876342773438, 31.09484100341797, 879.1119384765625, 850.5191650390625, 1009.89013671875, -453.4527893066406, -50.18669128417969, 514.784912109375, 76.9141845703125, 0.95404052734375, 295.6700439453125, 1158.78076171875, 878.5053100585938, 45.64662170410156, 797.202880859375, 1416.429931640625, 51.49614715576172, 856.006591796875, -419.978759765625, 961.8672485351562, 1057.2999267578125, -659.7672729492188, 786.4727172851562, 982.325439453125, -66.46815490722656, 149.18138122558594, 150.05352783203125, 816.5087280273438, -742.5147705078125, 285.2266845703125, 961.7619018554688, -963.6397705078125, -128.5702667236328, 160.1285858154297, 170.8563232421875, 686.215087890625, 119.5878677368164, -81.21875762939453, 572.6683959960938, 113.41321563720703, 195.266357421875, 266.3628234863281, 1275.9495849609375, 727.2124633789062, 661.2108154296875, 238.6070556640625, 516.6900634765625, 487.9679260253906, 684.971435546875, 260.38372802734375, 47.436973571777344, 471.1893615722656, 14.212446212768555, 1125.060302734375, -214.12496948242188, 132.71412658691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 341.43072509765625, "std": 579.2158203125, "min": -1071.7806396484375, "p10": -373.03632507324215, "median": 301.99839782714844, "p90": 1156.7523681640625, "max": 1828.9302978515625, "pos_frac": 0.75, "sample": [-13.289283752441406, 366.95123291015625, 403.3787841796875, -410.8636779785156, -0.5796852111816406, 1209.955322265625, 169.71385192871094, 923.5923461914062, 282.1767578125, 46.987152099609375, 191.57205200195312, -172.58790588378906, 293.00079345703125, 176.99037170410156, 1828.9302978515625, 693.1629638671875, 1165.7491455078125, 398.6470947265625, 310.9960021972656, 834.297119140625, 1096.8056640625, -84.53556823730469, 52.17936706542969, 240.8294219970703, 982.5464477539062, -429.5917053222656, -522.947998046875, 189.27313232421875, 1047.34716796875, 417.7138977050781, -1050.72998046875, 250.55874633789062, 527.5833740234375, 462.5682067871094, 207.21412658691406, 1261.5250244140625, -383.131103515625, 451.99981689453125, 434.3465270996094, 734.8268432617188, 1135.7598876953125, 21.937278747558594, -156.26368713378906, -327.0420227050781, 669.7650146484375, -223.04998779296875, 19.371051788330078, 709.263427734375, 74.22036743164062, -1071.7806396484375, 375.7744140625, 557.7227172851562, -349.4818420410156, -128.76266479492188, 17.390018463134766, -448.904052734375, 1178.5234375, 832.9340209960938, 1203.9755859375, 357.37725830078125, 1647.9862060546875, 563.5123291015625, 522.1663818359375, 84.00830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 343.71856689453125, "std": 441.7429504394531, "min": -571.0709228515625, "p10": -135.40584869384762, "median": 288.39906311035156, "p90": 959.9757629394531, "max": 1948.6300048828125, "pos_frac": 0.78125, "sample": [400.9244384765625, -259.31964111328125, 790.5623168945312, 591.2572631835938, 21.52069091796875, 618.71142578125, -152.37744140625, -185.0584716796875, 1263.9456787109375, 354.7071838378906, 373.67889404296875, 263.4707946777344, 479.612060546875, 1088.0938720703125, 798.308837890625, 28.04736328125, -47.29180908203125, 119.12052917480469, -571.0709228515625, 67.70339965820312, 402.7326354980469, 1097.768798828125, 81.91265106201172, 262.4787902832031, 45.46498107910156, 247.4625701904297, -197.3464813232422, 288.8946228027344, 532.2778930664062, 375.10357666015625, -449.04534912109375, 949.1742553710938, 97.56255340576172, 525.1285400390625, 519.2312622070312, 966.7345581054688, -181.43922424316406, 1948.6300048828125, 35.41917419433594, -95.80546569824219, 675.197021484375, 113.15824890136719, 254.85601806640625, 395.15472412109375, 376.6327209472656, 490.84942626953125, 1116.6959228515625, -32.14818572998047, 739.542724609375, 386.68865966796875, 787.5625, -45.93360900878906, -2.4258975982666016, 135.88919067382812, 964.60498046875, -34.781036376953125, 279.615478515625, 152.19595336914062, 756.4638671875, 287.90350341796875, -86.73429107666016, 355.42828369140625, 129.57289123535156, 305.11236572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 324.9077453613281, "std": 462.22705078125, "min": -554.1910400390625, "p10": -164.1680465698242, "median": 245.5921401977539, "p90": 816.8807373046875, "max": 1898.6490478515625, "pos_frac": 0.78125, "sample": [51.91270065307617, -125.14379119873047, 894.8240356445312, -153.4721221923828, -214.40711975097656, -185.19918823242188, 560.0006103515625, 252.7184600830078, 114.4332504272461, -145.5191650390625, -383.2127685546875, 302.62396240234375, -249.59841918945312, 253.8945770263672, 1061.536376953125, 799.630615234375, 289.06036376953125, 811.78369140625, 45.84375, 571.5621337890625, -134.8041534423828, 79.43226623535156, 66.75859069824219, 68.08753204345703, 309.6943359375, 1.3523101806640625, -187.32672119140625, 558.646240234375, 192.71420288085938, 47.9345703125, 498.53466796875, -84.6256103515625, 143.53382873535156, 619.1456909179688, 2.5555801391601562, -168.75201416015625, 1898.6490478515625, 1856.1495361328125, 598.4278564453125, 430.82220458984375, 208.44235229492188, 601.6473999023438, 699.6503295898438, 86.47435760498047, -71.611083984375, 529.5067749023438, 15.704874038696289, 819.065185546875, 178.94668579101562, 412.6710510253906, 139.04339599609375, 574.883056640625, 446.94659423828125, 268.3017578125, 990.6740112304688, 614.5508422851562, -34.026161193847656, 797.8817138671875, -554.1910400390625, 1087.04443359375, 238.4658203125, 507.78436279296875, 773.649169921875, 112.38662719726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 404.5033874511719, "std": 563.2022094726562, "min": -895.4720458984375, "p10": -307.26011352539064, "median": 389.08087158203125, "p90": 1127.3176391601564, "max": 1636.721923828125, "pos_frac": 0.765625, "sample": [-28.642486572265625, 210.92498779296875, 1243.11474609375, 344.5772705078125, 115.84242248535156, 388.3717346191406, 717.4652099609375, -215.31256103515625, 1621.642822265625, 1198.2681884765625, 1626.56640625, -699.6185913085938, 493.9498596191406, 665.0471801757812, -108.05259704589844, -81.05183410644531, 661.9677734375, 247.11865234375, 14.315055847167969, -160.97647094726562, -25.27527618408203, 26.037254333496094, 478.23004150390625, -533.1083374023438, 287.2790832519531, 220.92543029785156, -414.2169494628906, 609.95556640625, 249.84103393554688, 433.819091796875, -484.7300109863281, 876.8933715820312, -480.6971130371094, 1433.3997802734375, 962.61962890625, 568.739501953125, 891.7666015625, 690.6714477539062, 112.58688354492188, 440.2026062011719, 56.56121826171875, 1143.19189453125, 1052.235107421875, 513.751708984375, 916.9983520507812, 80.12869262695312, -312.6181640625, 785.8984375, 90.50352478027344, 867.4478759765625, 162.88412475585938, 703.5782470703125, 1090.2777099609375, 693.5675659179688, 354.4736328125, 384.42596435546875, -294.75799560546875, 724.401123046875, 1636.721923828125, 389.7900085449219, -895.4720458984375, 597.54833984375, 611.4740600585938, -65.2528076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 211.07835388183594, "std": 432.2928466796875, "min": -670.0232543945312, "p10": -314.675291442871, "median": 160.6900863647461, "p90": 821.5813049316406, "max": 1414.61328125, "pos_frac": 0.65625, "sample": [-4.806129455566406, 609.5145874023438, -459.35296630859375, 418.55499267578125, -62.13288879394531, 897.4971923828125, 159.14125061035156, -8.390274047851562, 33.813228607177734, 40.00520324707031, 698.3208618164062, 775.964599609375, 1414.61328125, -18.24383544921875, 501.47174072265625, 533.2637329101562, -110.63768005371094, 219.14898681640625, 57.485107421875, -210.5006866455078, 256.49945068359375, 680.2704467773438, 89.590087890625, 112.64114379882812, -504.14996337890625, -607.6889038085938, 172.81739807128906, 298.7886047363281, 822.3419799804688, 313.9171142578125, -253.5600128173828, 361.6658935546875, 388.7518310546875, -178.27523803710938, 238.37289428710938, 808.16943359375, 162.23892211914062, 615.7489624023438, -189.65603637695312, 599.0390014648438, -670.0232543945312, -407.09442138671875, 115.26847839355469, 337.975830078125, 2.219818115234375, 125.22765350341797, 890.9461059570312, 316.2161865234375, -192.64503479003906, 819.806396484375, -36.27547073364258, -116.95738983154297, -340.8675537109375, 932.2154541015625, 216.24485778808594, -151.7148895263672, 873.6669311523438, -444.5526123046875, -97.75494384765625, -11.409042358398438, 878.0892333984375, 238.09750366210938, 545.08056640625, 15.000476837158203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 355.7054443359375, "std": 487.26971435546875, "min": -761.8783569335938, "p10": -161.6764190673828, "median": 352.13250732421875, "p90": 1033.2281066894534, "max": 1730.385498046875, "pos_frac": 0.734375, "sample": [-305.81011962890625, 47.634857177734375, 727.3905029296875, 14.861618041992188, 269.4544982910156, 288.5404052734375, -307.9760437011719, 470.3288879394531, 885.6471557617188, 41.532894134521484, 547.9559936523438, 280.28350830078125, 163.1203155517578, 409.80035400390625, -464.29180908203125, 1138.2606201171875, 491.40545654296875, -27.635665893554688, 354.9637451171875, 578.6002807617188, 326.1961669921875, 1160.9019775390625, 399.4928894042969, -24.60572052001953, 1730.385498046875, -152.3990478515625, -42.51836395263672, 506.67327880859375, 584.6840209960938, 349.30126953125, 388.4564208984375, 1287.380859375, 400.5008239746094, 115.20307922363281, 521.9349975585938, -758.2877197265625, 347.9226989746094, 192.52557373046875, 1384.9537353515625, 875.505859375, 474.54644775390625, 702.8461303710938, -120.99201965332031, 427.46624755859375, 276.0603332519531, -761.8783569335938, 206.849609375, -165.65243530273438, 1069.72705078125, 866.7972412109375, 1063.7457275390625, -5.264799118041992, 377.069091796875, 606.7625122070312, 620.7664184570312, 962.0203247070312, -57.19282531738281, 857.969970703125, 281.93035888671875, -46.10124969482422, -360.33447265625, -17.407257080078125, 384.04962158203125, -76.90991973876953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 325.09228515625, "std": 540.8163452148438, "min": -976.935546875, "p10": -217.06893920898435, "median": 275.7851257324219, "p90": 847.2616821289063, "max": 2019.59326171875, "pos_frac": 0.75, "sample": [138.6613311767578, 222.9872283935547, 9.436080932617188, -976.935546875, 1243.327392578125, 83.0860595703125, 28.158248901367188, 410.1020812988281, 337.1119689941406, 215.8907470703125, -28.807449340820312, 377.5713806152344, -67.10758972167969, 406.6021728515625, 649.365234375, 569.71923828125, -405.59454345703125, 265.31939697265625, 224.6172637939453, 1986.1339111328125, 471.06756591796875, 778.515380859375, 122.99436950683594, -127.20076751708984, 432.116943359375, 321.59796142578125, 665.8778076171875, 395.10845947265625, -228.75381469726562, 414.690673828125, 286.2508544921875, -91.64224243164062, -137.4288330078125, -32.67247009277344, 20.795019149780273, 578.44580078125, 638.126708984375, 1603.396240234375, 613.0751953125, 67.36980438232422, 252.93821716308594, 7.0455474853515625, -30.710540771484375, 332.2975158691406, 2019.59326171875, 1099.9814453125, 133.87924194335938, 362.36566162109375, -309.59521484375, 839.3331909179688, 754.1825561523438, -386.1832580566406, -34.518348693847656, 622.0803833007812, 1440.924560546875, 582.4628295898438, 850.6596069335938, 432.7872009277344, 382.39044189453125, 45.10832977294922, 38.202964782714844, -189.80422973632812, -295.1195373535156, -625.7711181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 260.345703125, "std": 568.1893920898438, "min": -1425.4764404296875, "p10": -264.9733856201172, "median": 256.73179626464844, "p90": 835.7513977050784, "max": 1937.85302734375, "pos_frac": 0.6875, "sample": [218.32473754882812, 250.11273193359375, -692.47607421875, 775.3782348632812, 483.3070068359375, 27.3270263671875, 48.55428695678711, 1914.098388671875, 399.2682189941406, 615.1895141601562, 263.3508605957031, 861.6256103515625, 364.9241638183594, 1937.85302734375, 614.6133422851562, 382.100830078125, 139.89242553710938, -555.2967529296875, 1299.2174072265625, 740.4803466796875, -530.0621948242188, -153.69224548339844, 363.9316101074219, -714.4327392578125, 996.48046875, -237.24642944335938, 45.03021240234375, -250.28750610351562, -107.49360656738281, 291.78326416015625, -62.51869201660156, 415.5067443847656, -39.68681335449219, -36.46485900878906, 1397.8485107421875, -1425.4764404296875, 100.50313568115234, 120.9000473022461, 226.11239624023438, 722.3802490234375, 345.3423767089844, 312.91510009765625, 166.25048828125, 346.2198486328125, 738.6547241210938, 228.52423095703125, 566.8995971679688, 326.8260498046875, 707.525634765625, 579.9446411132812, 422.7800598144531, -201.94747924804688, -186.9213409423828, 937.9485473632812, 614.6119384765625, -239.109130859375, -205.86492919921875, -70.09356689453125, -461.04376220703125, 589.6970825195312, -271.267333984375, 343.76068115234375, 95.6061782836914, -236.09481811523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 283.69403076171875, "std": 465.6879577636719, "min": -690.7265014648438, "p10": -329.922769165039, "median": 241.27337646484375, "p90": 1005.4685241699219, "max": 1794.656982421875, "pos_frac": 0.78125, "sample": [54.63475799560547, -108.22988891601562, -476.2279968261719, 1004.218017578125, -3.5362548828125, 397.8951110839844, -165.9488525390625, 179.454345703125, 469.1561279296875, 272.2831726074219, 662.9573364257812, 133.27734375, 356.99041748046875, 151.3026123046875, 129.5608673095703, 255.7585906982422, 1062.959228515625, -411.16131591796875, -519.2646484375, 470.9078674316406, 1169.796142578125, 409.0648498535156, 170.9801483154297, 262.5869445800781, 363.03607177734375, 645.4271240234375, 105.500732421875, 78.6837387084961, 243.80670166015625, 450.8186950683594, 567.2633666992188, -481.84088134765625, 68.43925476074219, -100.45869445800781, 358.5475158691406, 1006.0044555664062, 543.817626953125, 82.02125549316406, 238.74005126953125, -310.88214111328125, -14.137359619140625, 315.35986328125, 282.099853515625, 154.31158447265625, 63.09857177734375, -690.7265014648438, 1078.9755859375, 1161.6395263671875, -101.63642883300781, 1794.656982421875, -378.0073547363281, 223.52371215820312, -338.0830383300781, 77.27975463867188, 195.15567016601562, 317.030517578125, 151.37887573242188, 35.941192626953125, 312.9329528808594, 399.3311767578125, 866.8173217773438, 1105.68798828125, 448.1846923828125, 907.2653198242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 371.8828125, "std": 394.6123352050781, "min": -496.0180969238281, "p10": -82.71425704956054, "median": 352.4671630859375, "p90": 846.7932983398439, "max": 1309.479248046875, "pos_frac": 0.78125, "sample": [187.24734497070312, 687.2352294921875, 1110.708740234375, 400.2794494628906, -108.94918823242188, 705.40673828125, 647.54052734375, 177.4941864013672, 151.3879852294922, 702.1444702148438, 251.93460083007812, 433.2948303222656, -249.5777130126953, 405.98291015625, 877.2405395507812, 658.7576293945312, 603.5958251953125, -496.0180969238281, -76.82256317138672, 872.8455810546875, 284.23236083984375, 440.11846923828125, 446.00726318359375, 130.76480102539062, 329.7402038574219, 462.65277099609375, 718.7435913085938, 1061.6658935546875, 856.5610961914062, -436.86322021484375, 812.5023193359375, 1236.604736328125, -78.06863403320312, -44.23090362548828, 552.5827026367188, 99.71732330322266, 339.4074401855469, 1309.479248046875, -322.55780029296875, 352.44976806640625, 329.40338134765625, 109.4664077758789, 780.2955322265625, 352.48455810546875, 467.4200439453125, 160.03182983398438, 467.6091613769531, 274.4152526855469, 126.84428405761719, 420.07098388671875, 762.5138549804688, -161.90101623535156, 310.73828125, 824.0017700195312, -53.48304748535156, 513.3865356445312, -84.70523834228516, 770.9962158203125, -39.97606658935547, -61.83081817626953, -14.248830795288086, 691.4757080078125, 326.6343078613281, 35.6193962097168], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 308.7491455078125, "std": 537.9898071289062, "min": -1391.147705078125, "p10": -98.82834854125974, "median": 202.5256118774414, "p90": 918.3888488769531, "max": 1885.853271484375, "pos_frac": 0.75, "sample": [-18.88927459716797, 1228.321533203125, 186.83438110351562, 202.94163513183594, 400.0444030761719, 134.27769470214844, 916.6748657226562, 1277.4716796875, 195.6881866455078, 136.46983337402344, 156.5794677734375, 884.4356079101562, 745.0606689453125, 701.5244140625, 115.74061584472656, 1066.5673828125, -1391.147705078125, 498.59967041015625, -263.248779296875, 633.0715942382812, 1266.091064453125, 830.369873046875, 157.45265197753906, 508.52386474609375, 223.26608276367188, 118.81769561767578, -875.442626953125, 294.24981689453125, -110.77682495117188, 564.345947265625, 519.7711791992188, 54.44648742675781, -37.61631774902344, 199.19203186035156, 347.4790954589844, 195.05824279785156, 919.1234130859375, -59.604949951171875, -11.4041748046875, 172.83489990234375, 781.3002319335938, 202.10958862304688, 53.26228713989258, -405.5525817871094, -70.94857025146484, 661.3092651367188, 559.40576171875, 72.76795959472656, -44.51380157470703, -28.121322631835938, -224.42764282226562, 576.0101318359375, 582.1487426757812, -50.598365783691406, 264.9695739746094, 361.57177734375, 345.6807556152344, -4.906974792480469, 1885.853271484375, 110.11009216308594, 494.0830993652344, -1040.322021484375, 204.32275390625, 1391.23583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 404.79510498046875, "std": 416.75244140625, "min": -539.9042358398438, "p10": -100.31288299560543, "median": 373.4578552246094, "p90": 968.7279357910156, "max": 1600.024658203125, "pos_frac": 0.84375, "sample": [1069.830322265625, 810.2257080078125, 810.6349487304688, -379.28057861328125, -181.0049285888672, 173.99679565429688, 1600.024658203125, 153.3072509765625, 58.85955810546875, -65.0552749633789, 179.3152313232422, 637.247314453125, 7.13062858581543, 399.2287292480469, -197.50869750976562, 19.22347640991211, 666.5678100585938, 536.7823486328125, 329.879150390625, -3.8227100372314453, -117.5231704711914, 548.6904907226562, 89.73117065429688, 535.10546875, 1187.697998046875, 611.9837646484375, 239.3142547607422, 450.61798095703125, 197.6341552734375, 707.1429443359375, 375.6187744140625, 359.0777587890625, 515.9938354492188, 480.30328369140625, 113.31404113769531, 149.4119110107422, 969.442138671875, 371.29693603515625, -115.42328643798828, 819.3502197265625, 117.64920043945312, 938.7662963867188, 423.9632568359375, 1151.681396484375, 78.69699096679688, -539.9042358398438, 575.232666015625, -51.287044525146484, 186.37889099121094, 435.407470703125, 1014.6072998046875, 1.1662063598632812, 368.6587219238281, 699.4962158203125, 337.7855529785156, 490.9758605957031, 669.20166015625, 312.3133850097656, 318.5679931640625, 789.42529296875, -237.9761505126953, 1044.5958251953125, 967.0614624023438, 700.0604248046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 281.6656494140625, "std": 466.2721252441406, "min": -683.2039794921875, "p10": -255.01807861328123, "median": 227.83553314208984, "p90": 944.6504211425785, "max": 1438.843994140625, "pos_frac": 0.75, "sample": [714.4085693359375, 305.7613525390625, 342.2672119140625, 178.91558837890625, 229.99627685546875, 229.62168884277344, 328.63818359375, 180.66165161132812, -266.4322509765625, 1003.111328125, 536.1099853515625, -110.28558349609375, 642.1888427734375, 42.690452575683594, 29.877954483032227, 763.975830078125, -0.5354022979736328, -62.94757080078125, 193.88893127441406, 361.6439208984375, 762.864990234375, 1015.2930297851562, 1275.0667724609375, 401.4173583984375, 565.4987182617188, 28.06613540649414, 74.68107604980469, 236.72711181640625, 308.6177978515625, -120.4130859375, 235.59622192382812, 715.2053833007812, 664.1907348632812, 502.4322509765625, 405.2952880859375, -108.41156005859375, 9.445266723632812, -683.2039794921875, 497.2196044921875, 1438.843994140625, -371.3780517578125, 173.96853637695312, 210.83770751953125, -612.3310546875, 502.7102355957031, -228.385009765625, -481.388916015625, -29.134469985961914, 1070.585205078125, 665.5269775390625, -678.63330078125, 26.252254486083984, 860.4549560546875, 1336.9224853515625, 181.19921875, 206.813720703125, -122.82398223876953, 511.90411376953125, -79.2664566040039, 56.50105667114258, -457.9547119140625, 980.7341918945312, 209.44744873046875, 226.04937744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 259.47412109375, "std": 465.7502136230469, "min": -836.28564453125, "p10": -227.51209564208983, "median": 175.8777084350586, "p90": 880.8511718750004, "max": 1566.6082763671875, "pos_frac": 0.671875, "sample": [-191.72604370117188, -185.73822021484375, 161.8720245361328, -145.77651977539062, 1130.681884765625, 158.41610717773438, 568.4515380859375, 627.2101440429688, 1469.4326171875, 931.7266235351562, 164.36253356933594, 588.4308471679688, -300.2784423828125, -93.81219482421875, 317.85845947265625, -24.603195190429688, -233.38291931152344, 166.49560546875, 1105.7872314453125, 1152.7652587890625, -246.21429443359375, -94.74494171142578, 395.7348327636719, -153.9004364013672, 45.305320739746094, 264.8203430175781, 330.2005615234375, 270.95831298828125, 301.75030517578125, -61.159889221191406, 760.1522827148438, 284.96209716796875, -128.015625, -95.9525146484375, 169.49533081054688, 182.2600860595703, 1173.894775390625, -325.0326232910156, 95.04393768310547, -213.81350708007812, 327.45379638671875, -467.5657958984375, -211.1019287109375, 364.4443359375, 156.33688354492188, 37.65256881713867, -836.28564453125, -100.11146545410156, -274.1024169921875, 656.6363525390625, 281.8251037597656, 442.4537353515625, 370.36041259765625, 684.1011962890625, 612.2131958007812, 95.51919555664062, 321.99835205078125, 489.5823974609375, 762.1417846679688, 262.23443603515625, 688.772705078125, 55.7674446105957, -4.507354736328125, 1566.6082763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 248.91294860839844, "std": 452.6782531738281, "min": -1450.1624755859375, "p10": -150.33513946533202, "median": 215.52842712402344, "p90": 665.852313232422, "max": 1598.659912109375, "pos_frac": 0.734375, "sample": [-88.88810729980469, -9.361160278320312, 566.8417358398438, 1007.7880859375, 619.6614990234375, -1450.1624755859375, 420.2450866699219, 424.4866943359375, 263.3594970703125, -553.6135864257812, -114.34278869628906, -9.712844848632812, -139.251953125, 329.7093505859375, 247.1355438232422, 384.21142578125, 115.09928894042969, 560.8319091796875, -239.21449279785156, 181.76339721679688, 423.1357116699219, -150.04481506347656, 89.43873596191406, 447.41461181640625, 183.9213104248047, 133.6957244873047, 603.9981689453125, -92.75534057617188, 406.63177490234375, 101.86360168457031, 772.2830200195312, 36.23004150390625, -127.5545883178711, 613.7681884765625, 435.7091064453125, 292.51202392578125, 526.0159301757812, 34.61870574951172, 1320.2962646484375, -256.8268127441406, 181.46578979492188, 1107.4620361328125, -413.56109619140625, -5.294639587402344, -344.95416259765625, 162.2945556640625, 159.51422119140625, 685.6483764648438, 19.964801788330078, -2.743743896484375, 32.259239196777344, 1142.5283203125, 95.31156158447266, 91.2038345336914, 343.4533996582031, 611.455078125, 453.2941589355469, -150.45956420898438, 329.27178955078125, 486.66998291015625, 377.44073486328125, 266.4524841308594, 1598.659912109375, 392.15496826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 347.9979248046875, "std": 412.7556457519531, "min": -583.3972778320312, "p10": -171.9089370727539, "median": 357.8537902832031, "p90": 931.2067687988285, "max": 1226.00732421875, "pos_frac": 0.8125, "sample": [125.2481460571289, 188.71408081054688, 592.443603515625, -214.7972869873047, 572.6605834960938, 763.631103515625, 411.9344177246094, -162.8481903076172, 284.71759033203125, 630.8548583984375, 1226.00732421875, 391.09832763671875, 452.9256591796875, 508.0712890625, 180.6235809326172, -104.51705932617188, 623.41748046875, 106.86705017089844, 29.767578125, 1165.418212890625, -583.3972778320312, 30.28741455078125, -146.792236328125, -365.6751708984375, 817.096923828125, 355.060791015625, -282.0265197753906, 608.7042236328125, 980.1533203125, 239.50384521484375, 374.1093444824219, 820.1795654296875, 1125.525146484375, 246.250732421875, 563.4602661132812, 600.745361328125, 258.0496826171875, 2.1200218200683594, 346.2024841308594, 395.74114990234375, 22.10859489440918, -38.532745361328125, 740.7445678710938, 487.7822570800781, 158.60662841796875, 1126.414306640625, 213.9356231689453, 143.15008544921875, 35.408348083496094, 328.436279296875, 491.8659973144531, 531.4297485351562, 135.46746826171875, 552.298583984375, 978.7898559570312, -175.7921142578125, -260.15582275390625, -556.3935546875, 601.4345092773438, 600.2674560546875, 654.615966796875, 1011.9027099609375, 360.64678955078125, -30.100940704345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 401.75970458984375, "std": 503.4451904296875, "min": -649.9931640625, "p10": -110.1693206787109, "median": 354.7850646972656, "p90": 912.8998657226563, "max": 2373.50927734375, "pos_frac": 0.8125, "sample": [350.7253723144531, 648.7769165039062, 119.64068603515625, 463.0289306640625, 769.6173706054688, 170.24075317382812, 179.0360870361328, 913.6279296875, 498.18182373046875, -260.1782531738281, 586.5037231445312, 1625.105712890625, 1282.1591796875, 284.48065185546875, 479.9415283203125, -33.797630310058594, 262.84283447265625, 560.2191772460938, 209.76646423339844, 665.6555786132812, 1402.435302734375, 729.5191650390625, 33.414710998535156, 87.64682006835938, -649.9931640625, -368.1488952636719, 136.6229248046875, 307.6946716308594, -244.65402221679688, 2373.50927734375, 420.22430419921875, -19.358963012695312, -287.5536804199219, 940.4874267578125, 106.7957763671875, 641.255859375, -499.958740234375, 554.9839477539062, -81.71389770507812, 333.376953125, 879.8975219726562, 48.854774475097656, 217.50868225097656, 381.0744323730469, 700.2095336914062, 545.7604370117188, 771.6363525390625, 362.44891357421875, 155.3179931640625, 1179.0589599609375, 143.47207641601562, 911.2010498046875, 403.986083984375, 816.0142822265625, 73.262451171875, 739.8343505859375, -3.0865631103515625, 205.18557739257812, 215.9007110595703, 383.1136474609375, -122.364501953125, 729.6363525390625, 358.8447570800781, -76.3051528930664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 331.05511474609375, "std": 428.399658203125, "min": -857.3502807617188, "p10": -204.24793548583983, "median": 302.2022247314453, "p90": 879.7692016601562, "max": 1233.34326171875, "pos_frac": 0.8125, "sample": [49.066158294677734, 1125.999755859375, 417.60150146484375, 260.18511962890625, 195.0378875732422, 345.24700927734375, 795.617431640625, 562.9849853515625, 84.11241149902344, 349.57635498046875, 772.2598266601562, 667.3975830078125, 291.58331298828125, -319.32489013671875, 476.0501403808594, 244.03831481933594, 374.3035888671875, 181.2230224609375, 84.2462158203125, 625.4998779296875, 878.0645751953125, -4.916542053222656, -296.4069519042969, 100.80081176757812, 55.909542083740234, 312.8211364746094, 737.343505859375, 172.31317138671875, 531.5037231445312, 713.3728637695312, 859.3277587890625, 179.23997497558594, 1152.2900390625, 1233.34326171875, 883.4473876953125, 880.499755859375, 54.96593475341797, -179.83444213867188, 520.052734375, 548.6672973632812, 176.02915954589844, -37.25628662109375, 357.8850402832031, -177.20352172851562, 322.88720703125, -367.91473388671875, -283.35711669921875, -857.3502807617188, 472.8287048339844, 518.292724609375, -46.11225891113281, 180.86106872558594, -214.7108612060547, 178.29586791992188, -508.09405517578125, 1080.701416015625, 1130.947509765625, 142.68984985351562, 4.807765960693359, 817.2675170898438, 369.537353515625, 162.10218811035156, 565.217041015625, 283.66619873046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 236.4358367919922, "std": 463.400146484375, "min": -1158.765625, "p10": -278.7657867431641, "median": 257.47682189941406, "p90": 787.6989501953128, "max": 1277.388671875, "pos_frac": 0.703125, "sample": [-11.041290283203125, 478.7635498046875, 384.7919006347656, -373.0135803222656, 882.6935424804688, -262.024658203125, 43.38121032714844, 675.9599609375, 1089.272216796875, 470.3309326171875, 173.7346954345703, 578.4735717773438, -280.1097717285156, 1083.516357421875, 301.8297119140625, -967.6093139648438, 1277.388671875, 599.3773193359375, 98.17449188232422, -158.5657501220703, 336.50225830078125, 107.20176696777344, 58.61085510253906, 349.5652160644531, 667.7267456054688, 285.7622375488281, 256.15704345703125, 560.1880493164062, 404.71734619140625, -49.288307189941406, -80.9415283203125, 713.4906616210938, -193.73678588867188, -131.2381591796875, 461.3554992675781, 79.0645523071289, 491.07623291015625, -93.41498565673828, -275.62982177734375, -40.32923889160156, 328.089111328125, -223.04336547851562, 933.9519653320312, 475.32159423828125, -1158.765625, 697.1351928710938, 1191.6617431640625, 10.170204162597656, 34.34210205078125, -34.48908996582031, 139.4202423095703, 819.5025024414062, -339.9166564941406, 627.8526611328125, -314.0916748046875, 199.41726684570312, 697.9857788085938, 29.49874496459961, 300.2774658203125, 519.4210815429688, 283.37982177734375, 33.70195388793945, 258.7966003417969, -369.88946533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 294.81298828125, "std": 475.60479736328125, "min": -975.8927001953125, "p10": -177.07352447509766, "median": 213.12313842773438, "p90": 1013.0749145507817, "max": 1895.03466796875, "pos_frac": 0.71875, "sample": [-161.06369018554688, -313.480224609375, 1085.275634765625, 157.6974334716797, 472.3846740722656, 111.32485961914062, 317.7705993652344, -60.1651611328125, -137.29754638671875, 112.63348388671875, -25.855819702148438, -49.180389404296875, 745.5924072265625, 60.37773132324219, -47.02057647705078, 603.1272583007812, 1224.712646484375, 1895.03466796875, -500.8671569824219, 352.824462890625, 1177.271728515625, 1309.32568359375, -305.8116149902344, 375.16748046875, -58.744903564453125, 559.1234130859375, 141.88589477539062, 557.0106201171875, 352.66021728515625, 245.08946228027344, 281.88494873046875, 209.57810974121094, 1196.2078857421875, 162.69215393066406, 66.28298950195312, 424.9767761230469, 577.3983154296875, 388.7866516113281, 359.3453674316406, -975.8927001953125, 518.7339477539062, 171.26455688476562, 231.42543029785156, 430.08270263671875, -39.31756591796875, 899.3709716796875, 154.0240478515625, 838.2743530273438, 462.5334777832031, 107.5953369140625, 216.6681671142578, -209.90277099609375, -200.25778198242188, 86.313232421875, 205.60366821289062, 628.6123046875, -176.580322265625, 1061.80517578125, -177.28489685058594, 550.8076171875, 130.9559326171875, -122.75517272949219, -5.879547119140625, 217.87298583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 322.8023376464844, "std": 426.2841491699219, "min": -600.845703125, "p10": -110.65766143798827, "median": 220.65264892578125, "p90": 921.3087890625002, "max": 1550.869384765625, "pos_frac": 0.796875, "sample": [486.6654052734375, 767.9552001953125, 177.27830505371094, 29.467041015625, -56.866065979003906, -600.845703125, -89.47433471679688, -42.51298522949219, -119.2001953125, 567.9417724609375, 44.158782958984375, -207.03402709960938, 12.53860855102539, 183.88848876953125, 370.2566833496094, 1019.2109985351562, 130.32647705078125, 15.64627456665039, -168.5276641845703, 801.1854248046875, 236.5253448486328, 27.280227661132812, -219.97764587402344, 252.46766662597656, 649.3767700195312, 519.6751708984375, 588.4403076171875, 215.09927368164062, 64.4496841430664, 61.68218994140625, 293.371826171875, 601.0963745117188, -18.46057891845703, 1020.498046875, 226.20602416992188, 465.3880310058594, 514.663818359375, 153.54367065429688, 702.5732421875, -159.541015625, 119.9675064086914, 443.174072265625, 292.59918212890625, 61.50041580200195, 84.63031005859375, 979.0867309570312, -90.72508239746094, 865.8408813476562, 19.678741455078125, -546.5089721679688, 749.7086181640625, 506.6370849609375, 699.546875, -70.81001281738281, 702.302490234375, 975.7598876953125, 64.05949401855469, 1264.311279296875, 63.10308074951172, 1550.869384765625, 551.3663330078125, 98.47801208496094, 945.0807495117188, 813.2761840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 314.26910400390625, "std": 494.19573974609375, "min": -880.8688354492188, "p10": -218.06816558837886, "median": 276.1820831298828, "p90": 1048.0582153320315, "max": 1405.782958984375, "pos_frac": 0.765625, "sample": [318.666015625, 958.4071655273438, 980.1553955078125, -100.4919662475586, 1077.159423828125, -21.082706451416016, 1405.782958984375, 167.27816772460938, 189.91856384277344, -11.898059844970703, -182.91757202148438, 350.9789123535156, 803.2391357421875, 66.70726013183594, 430.2226257324219, -43.898284912109375, 340.55816650390625, -81.11164855957031, 458.8519287109375, 225.72760009765625, 471.5927734375, 564.4392700195312, 105.18718719482422, 345.66552734375, -573.4242553710938, 951.3167724609375, 339.99945068359375, 159.66590881347656, 412.59088134765625, 663.1046752929688, 244.73394775390625, 286.43603515625, 159.92738342285156, 12.04022216796875, 415.46527099609375, -880.8688354492188, 477.6489562988281, 187.42538452148438, 286.2072448730469, -386.8529052734375, -233.13270568847656, 441.431884765625, 1108.562744140625, 144.57626342773438, -502.5711669921875, 624.0393676757812, 620.819580078125, 266.15692138671875, -818.9155883789062, 1150.372802734375, 1337.3778076171875, -174.14315795898438, 1222.1063232421875, 296.6907958984375, 73.51089477539062, -392.77978515625, 221.70101928710938, -127.47899627685547, 622.2108154296875, 266.1418151855469, 955.9122314453125, 123.425048828125, 1226.1864013671875, 86.46669006347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 332.15032958984375, "std": 522.4508056640625, "min": -973.9259643554688, "p10": -230.34017791748045, "median": 310.58213806152344, "p90": 1001.9951843261719, "max": 1526.4454345703125, "pos_frac": 0.734375, "sample": [-121.26786804199219, 968.6611328125, 88.54536437988281, 461.54058837890625, 347.8204345703125, -177.3166046142578, 441.96014404296875, 232.10885620117188, 867.0133666992188, 307.89849853515625, -748.9173583984375, 722.2492065429688, -0.8970966339111328, 47.342376708984375, -408.3519287109375, 309.6316223144531, 256.0107421875, 935.1412353515625, -516.5972900390625, 185.52932739257812, 658.4037475585938, 1014.0775146484375, 34.763492584228516, 89.67591857910156, 458.1322021484375, 153.66453552246094, 311.53265380859375, 67.28659057617188, -7.564079284667969, 1320.5181884765625, 571.0188598632812, 152.4671173095703, 1.2678546905517578, -973.9259643554688, 1003.3870239257812, 998.74755859375, -409.6906433105469, -135.8429718017578, 529.5403442382812, -208.17030334472656, 629.4697265625, -38.79339599609375, 1397.0418701171875, 321.4415588378906, 821.47802734375, -67.25692749023438, 348.6795959472656, 314.32049560546875, 1407.3790283203125, 442.9551086425781, 420.6862487792969, 135.37832641601562, 612.6611938476562, 114.00634002685547, 856.2938842773438, 1153.2393798828125, 693.0768432617188, -121.02482604980469, 648.830810546875, 1526.4454345703125, 650.708740234375, -492.781982421875, -239.841552734375, -104.1663818359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 315.73992919921875, "std": 476.3023376464844, "min": -1315.542236328125, "p10": -137.97330474853516, "median": 379.7837677001953, "p90": 826.505914306641, "max": 1485.582763671875, "pos_frac": 0.78125, "sample": [55.03172302246094, 918.8755493164062, 367.98736572265625, 506.9463806152344, 254.70718383789062, 204.2205810546875, 416.9879150390625, 49.38428497314453, 506.486083984375, 461.7804870605469, 410.7530212402344, 10.304901123046875, 521.3597412109375, 443.96484375, 750.2054443359375, 359.7174987792969, -28.900604248046875, 693.254638671875, 240.6167755126953, 546.4357299804688, -1315.542236328125, 509.41851806640625, 222.30047607421875, 1305.901611328125, 3.769317626953125, 50.470703125, 1040.123779296875, 344.2985534667969, 454.71990966796875, -64.8204345703125, -104.34121704101562, 562.0050048828125, 490.1358642578125, -266.2260437011719, 671.251220703125, 308.9529113769531, 1019.434326171875, 1485.582763671875, 674.8179321289062, 309.72412109375, -72.81572723388672, -962.2648315429688, 17.690444946289062, 0.02472686767578125, 688.0313110351562, -131.43008422851562, 395.8841552734375, -8.144081115722656, 408.9227294921875, 532.2745361328125, 859.2061157226562, 1114.3240966796875, 662.595458984375, -663.68017578125, 17.217567443847656, -252.96214294433594, 681.3878173828125, 616.677734375, -140.7775421142578, 386.0752258300781, 631.3475341796875, 373.4923095703125, -305.035400390625, -32.78436279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 430.468017578125, "std": 509.672607421875, "min": -590.3506469726562, "p10": -109.64266662597655, "median": 322.8650360107422, "p90": 1242.8630493164064, "max": 1722.0179443359375, "pos_frac": 0.8125, "sample": [310.63433837890625, 986.9904174804688, 382.65185546875, 56.327354431152344, 139.59002685546875, 335.0957336425781, 1449.9530029296875, 733.8052368164062, 416.5540466308594, -186.56539916992188, 1261.5029296875, -262.61181640625, 839.8788452148438, 249.7537078857422, 345.3916015625, -242.01402282714844, 179.4501495361328, 277.1393127441406, 840.928466796875, 643.4601440429688, 483.33392333984375, 297.7240905761719, -92.18438720703125, 122.54788208007812, -98.97161865234375, 362.9632568359375, 473.2867431640625, 447.6557312011719, 488.1188659667969, 1145.0859375, 1199.3699951171875, 68.70723724365234, 1722.0179443359375, -5.654457092285156, 1609.8111572265625, 259.08953857421875, 160.24627685546875, 228.50967407226562, 412.861572265625, -114.21597290039062, 1463.281494140625, -590.3506469726562, 79.50628662109375, -7.816225051879883, 124.91775512695312, 176.01889038085938, 440.8617858886719, 45.25826644897461, 558.253662109375, 167.060791015625, 840.8785400390625, 1395.02392578125, -116.5313720703125, -28.23748779296875, 804.8720092773438, 266.8262023925781, 401.2569885253906, 107.42940521240234, -330.7349853515625, 711.2384033203125, 292.2521667480469, 1536.495849609375, 383.4035949707031, 900.5687866210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 424.0378112792969, "std": 530.8768310546875, "min": -793.0073852539062, "p10": -243.3478271484375, "median": 485.77862548828125, "p90": 1177.0531372070313, "max": 1379.443115234375, "pos_frac": 0.75, "sample": [1253.107177734375, 731.5792236328125, -12.814739227294922, 329.744140625, 328.6103515625, 379.66650390625, -253.80445861816406, -71.44971466064453, 559.194580078125, -622.6975708007812, 521.9190063476562, 511.88970947265625, 789.9603881835938, 308.1655578613281, 1273.3394775390625, 1097.7442626953125, -145.50164794921875, 503.39013671875, 311.9089660644531, 537.2891235351562, 449.1640319824219, 502.2549743652344, 192.48390197753906, 804.0532836914062, 637.0167846679688, 602.4063720703125, 1361.856689453125, 148.36817932128906, 411.50384521484375, -192.91587829589844, -218.9490203857422, 577.5025634765625, -381.9858703613281, 1250.548095703125, -282.451416015625, 181.03292846679688, 1379.443115234375, 1081.4991455078125, 260.9889221191406, -718.654296875, 864.2855834960938, 671.119140625, 484.44195556640625, 1279.0496826171875, -411.42974853515625, 804.63330078125, 576.24951171875, 1004.5923461914062, -92.5539779663086, 504.48126220703125, -63.67291259765625, 247.10926818847656, 319.6739196777344, 918.1370239257812, 1028.52392578125, 35.87716293334961, 1137.1689453125, 722.4864501953125, 487.11529541015625, -8.592147827148438, -216.8046875, 68.98233795166016, -793.0073852539062, 1194.1463623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 277.2255859375, "std": 404.092529296875, "min": -700.3386840820312, "p10": -194.55968627929687, "median": 243.20643615722656, "p90": 761.7391296386719, "max": 1073.6138916015625, "pos_frac": 0.75, "sample": [-474.73138427734375, 692.5440673828125, 393.26934814453125, 377.4252014160156, 547.136962890625, 162.74923706054688, 885.8392944335938, -64.75321960449219, 28.015594482421875, 825.4688110351562, -175.43995666503906, -196.70736694335938, 619.6834106445312, 234.8354034423828, -700.3386840820312, -491.244140625, 629.023193359375, -169.39141845703125, 675.1117553710938, 553.4797973632812, -227.80393981933594, 982.5439453125, 43.235477447509766, -122.55355834960938, 119.43707275390625, 719.154541015625, -66.80137634277344, 98.44165802001953, 553.2901611328125, 715.5288696289062, 625.22607421875, 897.2514038085938, 354.67071533203125, 512.5396728515625, 1073.6138916015625, 1070.3702392578125, 768.77783203125, -24.9843807220459, 67.02513122558594, 107.23129272460938, 594.5950317382812, 126.89212799072266, 412.18450927734375, 279.91552734375, -128.755615234375, 74.20523071289062, 384.8822021484375, 376.5841369628906, 156.29473876953125, -554.5099487304688, 251.5774688720703, 135.97552490234375, 570.62841796875, 205.26708984375, -4.910003662109375, 52.97929382324219, 745.3154907226562, 617.86474609375, 566.7083740234375, 385.41070556640625, 226.4896240234375, -189.54843139648438, 48.62944030761719, -210.40867614746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 326.864013671875, "std": 561.7095336914062, "min": -560.3392944335938, "p10": -421.4179016113281, "median": 230.68694305419922, "p90": 1116.2629516601567, "max": 1826.345458984375, "pos_frac": 0.71875, "sample": [-175.49758911132812, 1151.650390625, 336.748046875, 212.03256225585938, 0.94207763671875, -393.813232421875, -37.40668487548828, 671.904052734375, 551.1432495117188, 1295.1239013671875, 365.19195556640625, -69.53507995605469, 358.4071044921875, -550.5029907226562, 111.56339263916016, 244.42877197265625, 347.219482421875, -97.30477905273438, 802.26220703125, 1826.345458984375, -80.3022232055664, 897.923095703125, 179.83819580078125, 798.6118774414062, 521.1929931640625, 942.8750610351562, -460.61676025390625, 1429.155517578125, -315.7385559082031, 1001.488525390625, -556.0383911132812, 182.9862060546875, 373.0401306152344, 225.16854858398438, 373.49786376953125, 200.8988037109375, 705.0335693359375, 165.4165802001953, 491.1114807128906, 74.27213287353516, -350.1383972167969, 889.5946044921875, -208.44876098632812, -558.8218994140625, 1162.6124267578125, 284.82489013671875, 444.71954345703125, 1721.96240234375, 236.20533752441406, 95.91058349609375, -525.334228515625, -560.3392944335938, 684.4180297851562, 1033.6922607421875, 164.65243530273438, 793.8920288085938, 580.43359375, 169.42311096191406, -433.24847412109375, -24.523040771484375, 93.57876586914062, -182.7777862548828, 1166.455078125, 139.83639526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 233.68368530273438, "std": 551.6707763671875, "min": -1830.523681640625, "p10": -422.24931335449213, "median": 274.6943359375, "p90": 829.2235961914063, "max": 1711.236083984375, "pos_frac": 0.671875, "sample": [283.1475830078125, -132.82386779785156, 1711.236083984375, 45.62689208984375, 92.18499755859375, 706.6343994140625, 453.3466491699219, 499.41876220703125, 730.6475830078125, 1283.11474609375, -253.06338500976562, 266.2410888671875, 227.40628051757812, -452.092529296875, 490.8097229003906, -117.72113800048828, 144.57644653320312, -8.244911193847656, 811.4150390625, -72.1755142211914, -352.6151428222656, 310.423095703125, 1054.305419921875, 923.9743041992188, 688.6487426757812, -545.0919799804688, -110.58344268798828, 323.6479797363281, -496.1177978515625, 598.6795043945312, 247.78506469726562, 587.5413208007812, -156.34860229492188, 387.87066650390625, -12.49432373046875, 45.15751647949219, 783.4306640625, 140.68528747558594, 560.9625244140625, 628.8709716796875, -1830.523681640625, 249.85223388671875, 836.8558349609375, -496.4537353515625, 513.5997924804688, -907.9816284179688, 52.404319763183594, 361.37750244140625, 845.5618286132812, 129.23532104492188, -562.1658935546875, 483.359130859375, -96.35883331298828, 375.0696105957031, -205.7987823486328, 520.9176025390625, 384.7615661621094, 594.0155029296875, 566.8854370117188, 1163.359619140625, -260.4643859863281, -219.5568084716797, -274.4219665527344, 413.8099670410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 370.5749816894531, "std": 636.5712280273438, "min": -2012.577880859375, "p10": -206.62015991210933, "median": 271.1577453613281, "p90": 1214.8056274414064, "max": 2154.60107421875, "pos_frac": 0.71875, "sample": [291.8124084472656, -223.31161499023438, -40.28956604003906, 196.418212890625, 276.3332214355469, -285.3896179199219, 1788.2579345703125, 499.3417053222656, 265.9822692871094, -450.8558044433594, 162.15199279785156, -110.42302703857422, -0.3963623046875, -142.95462036132812, 753.2413940429688, 837.4546508789062, -48.50288391113281, 55.85247802734375, -433.4947509765625, 1182.2288818359375, 1713.335693359375, 383.9926452636719, 912.6065063476562, 856.4972534179688, -118.21327209472656, 970.6245727539062, -84.9508056640625, 239.0855712890625, -369.528076171875, 564.8892822265625, -12.100156784057617, 1440.884033203125, 1059.1224365234375, 379.14019775390625, 412.5682067871094, 9.534191131591797, 715.279052734375, 282.8900451660156, -167.67343139648438, -430.8202819824219, 220.4265899658203, 1149.5103759765625, 620.2373046875, 2154.60107421875, 129.119873046875, 281.1929626464844, 502.3630676269531, 251.4271240234375, 1273.8055419921875, 120.9372329711914, 339.2112121582031, 70.20130157470703, 782.9103393554688, -2012.577880859375, 1228.76708984375, -47.93235778808594, -13.803863525390625, 1304.8197021484375, 513.5053100585938, 251.70806884765625, 95.56385803222656, 334.63336181640625, 599.6470947265625, 235.90365600585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 303.2028503417969, "std": 511.4059143066406, "min": -1094.2044677734375, "p10": -289.24245910644527, "median": 266.5740966796875, "p90": 903.0138977050782, "max": 1689.23779296875, "pos_frac": 0.765625, "sample": [505.0442199707031, 148.0269317626953, 621.8890380859375, 957.1397705078125, 485.60125732421875, -470.12823486328125, 320.76483154296875, 219.56729125976562, 1689.23779296875, -242.43472290039062, -309.30291748046875, 406.7438659667969, 1.5996150970458984, 28.111351013183594, 323.3847351074219, 220.03102111816406, 466.0699157714844, 260.02203369140625, -1094.2044677734375, 303.3979187011719, 905.7302856445312, 558.4454956054688, 568.6810302734375, 87.73431396484375, 857.1852416992188, 171.30859375, 524.9467163085938, 215.4267120361328, 405.1539306640625, -41.374961853027344, 1415.2611083984375, -460.75579833984375, 566.3029174804688, 476.8695373535156, 95.93775939941406, 222.96902465820312, -73.97920989990234, -433.6882629394531, -27.348388671875, 113.87157440185547, 1652.235595703125, 515.8106689453125, 106.5200424194336, -198.2018280029297, 1355.7939453125, -476.3865661621094, 480.90283203125, 1247.7742919921875, 318.59686279296875, 816.2748413085938, 896.6756591796875, -112.9976577758789, 581.5314331054688, -95.10787200927734, 228.65524291992188, 106.24833679199219, 453.95367431640625, 273.12615966796875, 277.6779479980469, 48.55427551269531, 164.35137939453125, 462.74237060546875, -508.9240417480469, -180.0643310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 356.5891418457031, "std": 591.6976928710938, "min": -1076.135498046875, "p10": -471.3128997802734, "median": 315.51820373535156, "p90": 1107.6733520507814, "max": 1867.0189208984375, "pos_frac": 0.75, "sample": [1250.4652099609375, 458.9830017089844, 1003.7517700195312, -245.44229125976562, 1083.3482666015625, 288.7679443359375, 1050.7376708984375, 772.763671875, -1076.135498046875, 280.9642333984375, -473.88922119140625, 1128.6878662109375, 564.4822387695312, 882.6734619140625, -71.96302795410156, 529.484130859375, 583.681396484375, -813.240234375, 432.436767578125, 355.9481201171875, 351.51776123046875, 1457.262451171875, 1867.0189208984375, 843.14501953125, 615.4056396484375, 702.55419921875, -76.96104431152344, 909.951904296875, 1211.6934814453125, 247.81097412109375, -83.09705352783203, 576.080078125, -230.6300048828125, 1118.098388671875, 688.8460693359375, 228.35720825195312, 306.4664611816406, 889.80029296875, -107.41410064697266, 261.9236755371094, 71.29924774169922, 656.6678466796875, 147.64117431640625, 620.1405029296875, 234.29322814941406, -160.7508544921875, 29.03852081298828, 745.5941162109375, 115.9525375366211, -80.32428741455078, -465.3014831542969, -672.6356811523438, 279.54083251953125, 1525.1441650390625, 324.5699462890625, 66.39434814453125, 179.5234832763672, -725.5531005859375, 139.73651123046875, 248.84262084960938, -504.50042724609375, 581.97802734375, -666.8604125976562, 366.939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 254.0311737060547, "std": 517.8369140625, "min": -1213.9302978515625, "p10": -314.43031616210936, "median": 243.37405395507812, "p90": 878.4519470214844, "max": 1891.4332275390625, "pos_frac": 0.703125, "sample": [728.65283203125, -386.7774353027344, 885.3665771484375, -380.6640319824219, 278.00555419921875, 223.974853515625, 850.0078125, 537.5825805664062, 490.90435791015625, 325.75189208984375, 550.2196044921875, 715.0867309570312, 647.5216674804688, 618.4009399414062, -219.4420166015625, 279.23870849609375, 862.3178100585938, 117.41263580322266, 350.1844787597656, -776.120849609375, 1319.1180419921875, 53.64471435546875, 23.42987060546875, 538.5341796875, 54.7387580871582, 465.84552001953125, -19.73325538635254, 119.38243103027344, 181.78033447265625, 1891.4332275390625, 262.77325439453125, 500.1268615722656, -23.89410400390625, -141.82449340820312, 70.66665649414062, -1213.9302978515625, -145.79214477539062, 320.9370422363281, -352.978515625, 538.593994140625, 169.8717803955078, 135.80885314941406, 1131.1868896484375, 1133.10693359375, -286.3156433105469, -24.662307739257812, -326.4794616699219, 1.1901702880859375, 319.95904541015625, 363.5245056152344, 269.130615234375, -44.24916076660156, 984.3922119140625, 148.0975341796875, -281.086669921875, -195.99078369140625, -826.903564453125, 132.51263427734375, 266.9130554199219, 906.5963134765625, -91.20861053466797, -88.00222778320312, 795.9691162109375, 524.1568603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 332.4776916503906, "std": 467.11407470703125, "min": -814.7329711914062, "p10": -226.62526245117186, "median": 294.9883575439453, "p90": 848.5834228515625, "max": 1755.5589599609375, "pos_frac": 0.78125, "sample": [415.1151123046875, 844.331298828125, -46.9814453125, 1755.5589599609375, 5.156009674072266, 679.175537109375, -484.86407470703125, 412.7626647949219, 234.35548400878906, -437.6715393066406, 67.86802673339844, 1017.613037109375, -218.84451293945312, 191.25808715820312, 835.8223266601562, 825.1493530273438, 636.002197265625, -229.95986938476562, 369.8406677246094, 761.0963745117188, 294.21710205078125, -98.76815795898438, 612.5543212890625, 470.39385986328125, 319.2383117675781, 420.543701171875, 883.175048828125, -25.62405014038086, 282.87603759765625, 248.87582397460938, 212.42892456054688, 829.494384765625, 205.78839111328125, 295.7596130371094, 549.369384765625, -448.1727600097656, -15.914703369140625, 1071.1495361328125, 753.7556762695312, 1013.2803955078125, 192.40798950195312, 245.54283142089844, -51.4300537109375, 158.1356201171875, 744.5585327148438, 177.58119201660156, 270.1656494140625, 800.0924072265625, 1393.2806396484375, 850.40576171875, 668.1477661132812, 295.97076416015625, 319.9841613769531, -0.6602287292480469, 413.0957336425781, 162.0509033203125, 303.4374084472656, 153.39389038085938, 320.9542541503906, 55.33477783203125, 20.740074157714844, -814.7329711914062, -283.88262939453125, -623.2079467773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 382.72406005859375, "std": 480.8632507324219, "min": -834.0400390625, "p10": -145.54684295654295, "median": 379.12245178222656, "p90": 1005.7485534667969, "max": 1973.2962646484375, "pos_frac": 0.8125, "sample": [-288.35272216796875, 1973.2962646484375, -109.66558074951172, 375.776611328125, -154.02699279785156, 332.40521240234375, 43.78073501586914, 972.5440673828125, 164.3612060546875, 596.4215698242188, 225.9715118408203, -41.865196228027344, 163.2681427001953, 1047.0218505859375, 436.3035583496094, 473.169189453125, 67.69904327392578, 1004.4044799804688, 217.41372680664062, 423.20330810546875, 949.8448486328125, 481.45452880859375, 416.460693359375, 412.4748229980469, -14.554489135742188, 1405.3399658203125, 1085.650390625, 379.6849060058594, 1.5516071319580078, 385.60296630859375, 1269.2645263671875, 742.9375, 949.4681396484375, -412.3978576660156, -239.61154174804688, -834.0400390625, 552.4786376953125, 656.4035034179688, 284.8548278808594, 378.55999755859375, 383.0284423828125, 1082.5308837890625, 612.499755859375, 211.31613159179688, 595.3204345703125, 180.3459014892578, 44.79717254638672, 636.647216796875, -95.26893615722656, -395.0111083984375, 583.5523071289062, 151.40292358398438, 504.19903564453125, 11.858652114868164, -125.75982666015625, -164.71417236328125, 905.4483032226562, 133.29049682617188, 1006.3245849609375, 223.50775146484375, 40.47308349609375, 503.01409912109375, 96.53228759765625, 594.4462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 448.9898681640625, "std": 627.0385131835938, "min": -963.8006591796875, "p10": -108.69580764770507, "median": 292.98634338378906, "p90": 1093.4592285156252, "max": 3586.003173828125, "pos_frac": 0.84375, "sample": [184.7272491455078, 791.9127807617188, 298.48492431640625, 920.2914428710938, 1256.081787109375, 59.74192810058594, 154.43585205078125, 36.95111083984375, 184.21697998046875, -49.31031799316406, 657.18310546875, 291.58306884765625, 923.7809448242188, 959.082763671875, -155.2626495361328, 469.288818359375, 1286.92822265625, 133.0634307861328, 697.249755859375, 656.3587036132812, 1985.85546875, 662.1275024414062, -116.96520233154297, 571.9500732421875, 21.980857849121094, 1041.8402099609375, 29.3065185546875, 561.2578125, 614.1389770507812, -67.460693359375, 3586.003173828125, 677.5018310546875, 305.9559326171875, 294.3616027832031, 523.3124389648438, 559.8472290039062, 63.36918640136719, 1115.5816650390625, -170.31771850585938, 49.75069808959961, 109.24122619628906, -963.8006591796875, 1530.9736328125, 892.707275390625, 378.14208984375, -304.0133056640625, 136.52001953125, 148.0330810546875, 187.4300079345703, 207.4150390625, 273.19873046875, -111.41352081298828, 265.5492858886719, 291.611083984375, 278.7275085449219, 764.1241455078125, 174.8075714111328, -102.35447692871094, -371.17535400390625, 51.59466552734375, 678.08154296875, 634.0869750976562, 379.42999267578125, 1140.246337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 328.5495910644531, "std": 499.09906005859375, "min": -868.1473388671875, "p10": -257.56859588623036, "median": 270.5533447265625, "p90": 974.6017639160157, "max": 1409.57861328125, "pos_frac": 0.84375, "sample": [-864.0680541992188, -579.825927734375, 13.289108276367188, 232.71661376953125, -418.27667236328125, 256.66326904296875, 65.44865417480469, 964.2919311523438, 7.2540435791015625, 6.172359466552734, 456.7340393066406, 9.728591918945312, 498.2384948730469, 197.1494903564453, -292.72564697265625, 881.6729736328125, 979.020263671875, -39.263275146484375, 284.44342041015625, -139.67266845703125, 65.78106689453125, 910.7872924804688, 80.66764068603516, -615.3411865234375, 830.1280517578125, -175.5354766845703, 1409.57861328125, 1038.0599365234375, 502.3363037109375, 549.7659912109375, 981.4964599609375, 394.4906005859375, 930.0822143554688, 377.6773681640625, 639.0222778320312, 76.9506607055664, 242.84085083007812, 560.6443481445312, 297.4258117675781, -494.833740234375, 170.83761596679688, 445.5935363769531, 53.76661682128906, -868.1473388671875, 28.476940155029297, 802.8153076171875, 22.513912200927734, 191.97283935546875, 306.17626953125, 720.37353515625, 92.02088928222656, 1370.2470703125, 133.48257446289062, 903.87353515625, 633.5514526367188, 145.24417114257812, 1207.9283447265625, 126.01513671875, 769.5049438476562, 332.2072448730469, 399.9467468261719, 650.24169921875, 229.2344970703125, 1038.2796630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 351.7939758300781, "std": 455.4217224121094, "min": -436.4878845214844, "p10": -195.4806411743164, "median": 343.7490539550781, "p90": 947.5911804199221, "max": 1929.94970703125, "pos_frac": 0.75, "sample": [33.61799621582031, 332.06060791015625, 527.53125, 1172.811279296875, 470.66925048828125, -189.61627197265625, 887.5418701171875, 310.7629699707031, 513.6668701171875, 434.7200012207031, -102.56896209716797, 1240.5032958984375, 605.398193359375, 588.6002807617188, 196.96141052246094, 705.1561279296875, -274.76953125, 30.36333465576172, 488.64898681640625, 1075.7122802734375, 301.0994873046875, 419.3042297363281, 178.05734252929688, 608.1353759765625, 491.45928955078125, 84.79859924316406, -92.92919921875, 1244.671875, -52.03782653808594, -335.0898742675781, -213.7023162841797, 828.1365356445312, 214.57032775878906, -94.75346374511719, 1929.94970703125, 618.627685546875, 284.0962829589844, 430.213623046875, 355.4375, 770.8585205078125, 101.38134765625, -50.13031768798828, 10.11202621459961, 601.8814086914062, 135.17324829101562, 562.0796508789062, -436.4878845214844, 470.4945983886719, -285.9745178222656, 96.41363525390625, 524.2501220703125, 523.3674926757812, 402.1875, 426.8106994628906, -197.9939422607422, -8.132011413574219, 262.66845703125, -3.065845489501953, 1274.229736328125, 973.3265991210938, -212.14564514160156, -170.46502685546875, 485.7357177734375, 10.421926498413086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 314.3374328613281, "std": 408.0987854003906, "min": -674.3265380859375, "p10": -146.17467727661128, "median": 308.7460632324219, "p90": 773.5888610839844, "max": 1412.402099609375, "pos_frac": 0.734375, "sample": [373.8508605957031, 342.62908935546875, 58.441734313964844, -23.688814163208008, 313.23541259765625, 424.7613525390625, -188.1923065185547, -17.22594451904297, -86.91403198242188, 950.4619140625, -233.318359375, -35.1180419921875, 1260.185302734375, 126.95948791503906, 757.365966796875, 553.9605712890625, 626.0999755859375, 435.97369384765625, -12.41961669921875, 766.0001220703125, 1412.402099609375, 112.79205322265625, 157.5188751220703, 602.25537109375, 121.71125030517578, 95.55764770507812, 104.37480163574219, 523.77734375, 48.87953186035156, 711.592529296875, -218.73043823242188, 542.7172241210938, -674.3265380859375, 557.353759765625, 664.96875, 94.922607421875, 7.449745178222656, 563.383056640625, 224.31202697753906, -221.52279663085938, -84.14637756347656, 1095.771484375, -105.58151245117188, 724.5692138671875, 62.4565544128418, -162.648193359375, 774.86962890625, -215.34458923339844, 118.8060531616211, -107.7364730834961, 590.2669677734375, 108.63040924072266, 770.6004028320312, 317.0954284667969, 394.03997802734375, 362.62738037109375, 801.8379516601562, 476.64178466796875, 304.2567138671875, 404.7183837890625, -44.41902160644531, 1151.75830078125, 594.36669921875, -40.27998352050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 433.374267578125, "std": 555.3549194335938, "min": -541.0742797851562, "p10": -188.10508575439448, "median": 358.92344665527344, "p90": 1321.1284790039067, "max": 2163.112548828125, "pos_frac": 0.8125, "sample": [454.4844055175781, 505.49591064453125, 607.7909545898438, -212.94593811035156, 1652.1148681640625, -245.05078125, 352.4893493652344, 11.14715576171875, -481.5404357910156, -433.70208740234375, 8.216756820678711, 601.0823974609375, -10.931533813476562, 408.01544189453125, 2163.112548828125, 89.48632049560547, 267.2200012207031, 79.25180053710938, 555.4734497070312, 435.508544921875, -1.749420166015625, 125.86626434326172, 286.6758728027344, 700.072021484375, 932.238037109375, 1112.7340087890625, 459.6714782714844, 1217.996826171875, 1150.9881591796875, 366.2407531738281, 660.6449584960938, 26.283763885498047, 729.890380859375, 8.491554260253906, 181.19256591796875, 424.1860656738281, 52.215789794921875, -43.82585144042969, 1365.3277587890625, 295.4569396972656, 1003.433837890625, 1573.9705810546875, 1409.0828857421875, 1371.8685302734375, -541.0742797851562, 703.7413330078125, 343.8732604980469, 224.0226593017578, 444.8057861328125, 365.3575439453125, -86.31785583496094, 293.0387268066406, 610.0317993164062, 622.9068603515625, 294.8399658203125, 275.9106140136719, 598.6705932617188, 235.35084533691406, 1389.32861328125, -130.14309692382812, 72.91080474853516, 633.1099853515625, -386.87298583984375, -443.2115173339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 246.78013610839844, "std": 519.0712280273438, "min": -1036.370849609375, "p10": -388.62051086425765, "median": 210.79891967773438, "p90": 856.8711791992189, "max": 1645.8704833984375, "pos_frac": 0.734375, "sample": [-460.6377258300781, 207.69143676757812, 571.2757568359375, 94.90371704101562, 126.61360931396484, 162.40866088867188, 417.1782531738281, 65.65962219238281, 178.44305419921875, -159.7232666015625, 28.574615478515625, 1114.180908203125, -815.7427978515625, -8.767196655273438, 877.158935546875, 56.106117248535156, 131.058837890625, 1189.8885498046875, -90.3245620727539, 787.957763671875, 1401.4168701171875, 732.0833740234375, 4.954010009765625, -465.75054931640625, 260.0589599609375, -175.2884521484375, 294.00115966796875, 294.39678955078125, -455.105712890625, 710.25, 330.4685363769531, 77.12593841552734, 809.5330810546875, 184.77101135253906, 213.90640258789062, 143.307861328125, 224.80462646484375, 164.80088806152344, 701.5675659179688, 437.51318359375, -128.44090270996094, 178.6282501220703, 768.4996948242188, 971.5520629882812, 553.027099609375, 308.06195068359375, -904.2205810546875, -596.181640625, 542.2877807617188, 563.8428955078125, -73.89542388916016, 396.6138000488281, 259.4559020996094, 1645.8704833984375, 1282.22900390625, -204.74859619140625, -1036.370849609375, 486.4277648925781, -233.48837280273438, -159.04904174804688, -182.72930908203125, 235.39773559570312, 516.3013916015625, 242.13868713378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 439.6311340332031, "std": 589.29736328125, "min": -1120.7763671875, "p10": -267.1587646484375, "median": 399.7540588378906, "p90": 1165.7597167968752, "max": 1977.364990234375, "pos_frac": 0.828125, "sample": [1117.258056640625, 253.2967529296875, 255.11306762695312, 274.40985107421875, 106.29102325439453, 1059.464599609375, -261.32275390625, 1933.2982177734375, 1110.6063232421875, 210.4881591796875, 39.49961853027344, 495.8966064453125, 703.245361328125, -116.84286499023438, 44.29486846923828, 445.06903076171875, 1637.1898193359375, 462.3643798828125, -138.974853515625, 1977.364990234375, 245.65866088867188, 82.48847961425781, 417.68817138671875, 406.71380615234375, -379.2688293457031, 273.61474609375, 59.02923583984375, -308.9901123046875, -327.95098876953125, 862.099365234375, -269.659912109375, 913.8211669921875, 762.9082641601562, 257.9236145019531, 1235.419677734375, -15.48904800415039, 546.8172607421875, 540.5413818359375, -523.2774047851562, 793.4539184570312, 436.7657775878906, 275.4364929199219, 510.9416198730469, 123.0114974975586, 1716.607177734375, 164.82638549804688, 203.5303955078125, -1120.7763671875, 713.8689575195312, 164.2306365966797, 667.7599487304688, 304.7432861328125, 203.81517028808594, 392.7943115234375, 1415.0416259765625, 1186.546142578125, 492.6966857910156, -711.9656982421875, 1033.902099609375, 326.5522766113281, 431.14703369140625, 623.3233642578125, 607.1602783203125, 792.8825073242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 293.76068115234375, "std": 688.071533203125, "min": -921.1615600585938, "p10": -336.1901092529297, "median": 151.06585693359375, "p90": 1114.211230468751, "max": 3084.617431640625, "pos_frac": 0.640625, "sample": [28.754043579101562, -71.21990966796875, 741.4505004882812, -83.44293212890625, 16.447927474975586, 2033.3944091796875, 3.738872528076172, 47.34751892089844, -921.1615600585938, 651.0532836914062, -21.888526916503906, -829.5126342773438, 54.44527816772461, 239.3994140625, 1757.2088623046875, 253.81776428222656, 37.9083251953125, 3084.617431640625, -498.5519104003906, -25.201026916503906, 694.7925415039062, -430.01959228515625, -289.20587158203125, 159.7928924560547, -305.790283203125, -53.187110900878906, -81.0865478515625, 568.5267333984375, -490.88592529296875, 90.99021911621094, 520.9468383789062, 331.662841796875, -347.27215576171875, 1294.71142578125, -253.14956665039062, -88.10591125488281, 1968.0546875, 603.8314208984375, 860.9122314453125, 111.13390350341797, 1222.7679443359375, 142.3388214111328, -268.7047119140625, 203.8567352294922, 752.9365844726562, -77.30329132080078, 297.9111328125, 1626.9739990234375, -385.17315673828125, 689.8182983398438, 206.04507446289062, -119.24462127685547, 347.758544921875, -310.3320007324219, 454.2377014160156, 559.458984375, 251.58895874023438, -191.14712524414062, -76.95314025878906, 335.69195556640625, 190.42184448242188, 426.9286193847656, 701.8873901367188, 453.658935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 357.8401184082031, "std": 533.2598876953125, "min": -891.0479736328125, "p10": -195.1996109008789, "median": 314.7194061279297, "p90": 1013.0060363769536, "max": 1751.23046875, "pos_frac": 0.75, "sample": [1065.108642578125, 430.0726318359375, 434.35272216796875, 457.8031311035156, 1751.23046875, -705.655029296875, 389.8844299316406, -12.957176208496094, 891.4332885742188, 73.72946166992188, 1185.6370849609375, -4.28373908996582, 803.0430297851562, 245.24659729003906, -858.7125244140625, 388.3126525878906, 695.9224243164062, 208.2369384765625, -176.04156494140625, 446.4623107910156, 329.5182189941406, 456.0306701660156, -340.0594177246094, 250.33926391601562, -19.986848831176758, 251.79820251464844, 773.62939453125, 95.1387939453125, -370.6651611328125, -7.456457138061523, 1188.16162109375, 181.84902954101562, -47.14061737060547, 620.779052734375, -891.0479736328125, 778.5345458984375, 530.7044677734375, 476.0267333984375, 582.2904052734375, -203.4102020263672, 299.92059326171875, -50.745426177978516, 249.48684692382812, 1561.865478515625, 674.35791015625, 26.674640655517578, 39.15196228027344, 1499.99072265625, 802.4212646484375, 486.21405029296875, 40.86312484741211, 189.07638549804688, 594.200927734375, 1464.8197021484375, 655.4243774414062, 181.4149627685547, 298.9461669921875, -91.66978454589844, 438.5664978027344, 58.96989440917969, -153.27366638183594, 849.608154296875, -354.33526611328125, 795.9581909179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 424.308837890625, "std": 554.3837280273438, "min": -706.2052001953125, "p10": -85.45823364257812, "median": 294.38189697265625, "p90": 1317.5322631835938, "max": 1722.056396484375, "pos_frac": 0.828125, "sample": [-41.669097900390625, 8.194976806640625, -706.2052001953125, 38.640045166015625, 289.832275390625, 766.5442504882812, 298.9315185546875, 79.57469177246094, 592.2094116210938, -463.1846618652344, 551.2222900390625, 639.3079833984375, -86.65381622314453, 726.3072509765625, 1080.0296630859375, -72.52916717529297, 1378.946044921875, 1353.912109375, 1293.2254638671875, 431.5160217285156, 873.5458984375, 53.87917709350586, 282.6385498046875, 258.60516357421875, -66.0486831665039, 188.89358520507812, 10.6005859375, 400.34918212890625, 868.912109375, -82.66854095458984, 1490.396728515625, 408.3358154296875, 104.69515228271484, -143.1068572998047, 1661.432861328125, 873.8256225585938, 89.25166320800781, 1285.6407470703125, -673.1444702148438, 390.49560546875, 73.1739273071289, 404.1992492675781, 85.52301025390625, 643.7227783203125, 395.7548522949219, 831.5296020507812, 896.53564453125, 1334.5640869140625, 303.6560363769531, 255.63055419921875, 178.5121612548828, 105.77203369140625, 194.63626098632812, 470.5074157714844, 105.48013305664062, 1722.056396484375, 1026.8951416015625, -315.8997497558594, 287.94464111328125, 157.21453857421875, 606.8016357421875, -596.687255859375, 225.61074829101562, 1327.949462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 282.87054443359375, "std": 454.8184509277344, "min": -1275.2987060546875, "p10": -267.89700012207027, "median": 282.4901123046875, "p90": 880.3415405273438, "max": 1104.3133544921875, "pos_frac": 0.734375, "sample": [-278.8830261230469, 131.26190185546875, 429.00799560546875, -7.6822509765625, 313.74444580078125, 494.30810546875, 243.80780029296875, 526.7850341796875, 155.16656494140625, 283.7657470703125, -181.37310791015625, -220.18826293945312, -352.4896240234375, -52.82855224609375, 734.7862548828125, 96.58702087402344, 29.771888732910156, 546.5313720703125, 928.8030395507812, 241.062255859375, -59.20659637451172, 745.5240478515625, -329.0594177246094, 1063.298583984375, 400.22955322265625, 687.912109375, 864.96435546875, 448.35845947265625, 891.8270263671875, 384.6333923339844, 1104.3133544921875, -360.0340576171875, -17.225852966308594, 278.4260559082031, 294.83416748046875, -603.0658569335938, 647.339599609375, 357.439453125, 837.4613647460938, 1061.027099609375, -1275.2987060546875, 737.2437744140625, 375.16888427734375, 281.2144775390625, 454.9236145019531, 886.9317626953125, 33.13031005859375, 714.46337890625, -126.1355972290039, 220.4678955078125, 110.79584503173828, 74.94750213623047, 611.2847900390625, 241.36434936523438, -211.5841522216797, 551.283447265625, 178.89169311523438, 864.8859252929688, -242.262939453125, 89.84380340576172, -138.62696838378906, 414.0894470214844, -394.26934814453125, 890.0211181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 363.1061096191406, "std": 508.4866027832031, "min": -1084.61865234375, "p10": -113.40253829956055, "median": 344.9618225097656, "p90": 982.8627868652344, "max": 2077.60546875, "pos_frac": 0.75, "sample": [436.13134765625, 117.56848907470703, -1084.61865234375, 958.8250732421875, -48.43830490112305, -6.142059326171875, 298.2281799316406, 747.3365478515625, 105.294677734375, 475.6514587402344, 174.89727783203125, 1234.9814453125, -56.415706634521484, 1579.510009765625, 129.20069885253906, -64.88831329345703, 710.7974853515625, 503.3760681152344, -183.8014373779297, 986.4521484375, -58.90593719482422, 20.57270050048828, 403.05523681640625, 183.5126953125, 498.7814025878906, -64.01983642578125, 527.2080688476562, 102.20585632324219, 245.50331115722656, 354.0565490722656, 974.1043701171875, 33.45112609863281, 538.75927734375, 708.072509765625, 601.4216918945312, 316.98284912109375, -77.87559509277344, 974.4876098632812, 351.551513671875, 61.85540771484375, 595.732177734375, 2077.60546875, 479.5046081542969, -542.7085571289062, -171.41677856445312, -108.6430435180664, -431.621337890625, 989.65087890625, 106.86027526855469, 423.98419189453125, 1332.01416015625, 394.2586669921875, 418.1152648925781, 463.57568359375, 167.3040008544922, 294.5328369140625, 889.8717041015625, -25.7364501953125, 394.26177978515625, -115.44232177734375, 756.2227783203125, 1012.6954956054688, 338.37213134765625, -208.92974853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 346.9548034667969, "std": 532.9920654296875, "min": -876.357177734375, "p10": -149.98497467041014, "median": 282.1988525390625, "p90": 798.2471069335938, "max": 2295.4228515625, "pos_frac": 0.765625, "sample": [46.33857727050781, 780.6045532226562, 308.79730224609375, 223.64097595214844, -8.490730285644531, 282.686767578125, -876.357177734375, 1233.5333251953125, 400.8035888671875, 165.40933227539062, 481.188720703125, 789.407470703125, 1095.446044921875, 479.92669677734375, 578.5072021484375, -261.2315673828125, -145.8905792236328, 305.92364501953125, 701.1495361328125, 125.77313995361328, 831.4285278320312, 350.6397399902344, 243.12606811523438, -133.35186767578125, 492.505859375, -15.816993713378906, 114.66419982910156, 281.7109375, 307.9463195800781, 256.70526123046875, -151.73971557617188, 2280.779541015625, 657.185791015625, 212.55047607421875, 190.33131408691406, -400.6179504394531, -85.3880615234375, 715.4817504882812, -161.83279418945312, 2295.4228515625, 21.441646575927734, -75.89419555664062, -204.5495147705078, 359.62646484375, -21.523530960083008, 142.00485229492188, 454.85418701171875, 126.6942138671875, 359.5384826660156, -174.28530883789062, 587.3021850585938, 427.0010681152344, -57.6326904296875, 1819.822265625, 473.36895751953125, 518.7999267578125, 105.53865051269531, 0.034454345703125, 233.1227264404297, 328.8323059082031, 82.7038803100586, 519.9193115234375, 387.45416259765625, 802.0355224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 256.8887634277344, "std": 471.1693420410156, "min": -780.1717529296875, "p10": -224.0887908935547, "median": 194.65565490722656, "p90": 914.3135070800782, "max": 1465.4217529296875, "pos_frac": 0.71875, "sample": [341.5253601074219, 953.3421630859375, 90.06611633300781, -66.4834976196289, -97.27772521972656, 60.263423919677734, -209.1781768798828, 427.24041748046875, 572.9832153320312, 667.7770385742188, -176.14991760253906, 1071.618896484375, -4.760826110839844, 184.83880615234375, -148.92593383789062, -343.2003173828125, 790.7520141601562, -394.8857421875, -209.87185668945312, -495.6303405761719, 509.46478271484375, 924.2467041015625, -305.5465087890625, 257.0618896484375, 357.44915771484375, 313.7746276855469, 192.07318115234375, 18.99219512939453, 687.676513671875, 891.1360473632812, 152.97125244140625, 265.4919738769531, 304.0162353515625, -142.10369873046875, 382.4866027832031, -775.4430541992188, 197.23812866210938, 692.919189453125, 108.8019027709961, 1.0755195617675781, 35.117950439453125, -64.57518768310547, 322.7554016113281, 652.0272827148438, 1259.528076171875, 176.3124237060547, -111.59019470214844, 299.27532958984375, -230.1817626953125, 1465.4217529296875, 1120.0625, -780.1717529296875, 63.13404846191406, 480.5728759765625, 431.13653564453125, -201.11318969726562, 302.2668151855469, 82.91915893554688, 635.080322265625, 1417.44921875, 312.5860595703125, 500.814208984375, 151.7135772705078, 72.51302337646484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 315.1492919921875, "std": 470.0491638183594, "min": -898.668212890625, "p10": -256.9260391235351, "median": 268.59307861328125, "p90": 873.3139465332034, "max": 1821.2257080078125, "pos_frac": 0.796875, "sample": [259.7001037597656, 246.73062133789062, 1062.8551025390625, 390.11212158203125, 33.983306884765625, 804.8665161132812, 1032.108642578125, 192.9344024658203, -154.39939880371094, 329.30322265625, 605.23583984375, 483.55670166015625, 520.926513671875, 191.28964233398438, 306.65093994140625, 1463.237548828125, 72.65648651123047, -429.3727111816406, 109.27616882324219, 240.60430908203125, -898.668212890625, 7.06561279296875, 413.3280029296875, 766.8433227539062, -81.30525207519531, 774.1756591796875, 902.6485595703125, 99.9145278930664, 257.35845947265625, 229.3610076904297, 47.77142333984375, -650.8178100585938, 352.31103515625, -129.61941528320312, 405.1453552246094, 765.4654541015625, -71.49717712402344, 434.2579345703125, 1821.2257080078125, 366.9470520019531, 269.49212646484375, 690.8742065429688, 86.26435852050781, 125.4525146484375, 1115.664306640625, 403.2474365234375, 1028.8646240234375, 111.86024475097656, 189.66983032226562, 677.9223022460938, 540.0059814453125, 637.7642822265625, -300.86602783203125, 485.30389404296875, -139.09097290039062, 284.7959289550781, 267.69403076171875, 495.7428894042969, 244.43783569335938, -413.17529296875, 512.17529296875, -316.49871826171875, -373.01641845703125, -29.196304321289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 486.6854553222656, "std": 477.7404479980469, "min": -453.0977478027344, "p10": -84.47924957275389, "median": 450.67181396484375, "p90": 1118.811474609375, "max": 1901.91943359375, "pos_frac": 0.84375, "sample": [-9.86507797241211, -17.740921020507812, 527.8724365234375, 441.19342041015625, 603.92578125, 623.165771484375, -445.7456359863281, 33.21770095825195, 1111.69287109375, -265.89093017578125, 159.7049560546875, 324.43109130859375, -68.48750305175781, 557.9596557617188, 658.2008666992188, 1045.0836181640625, 469.5014953613281, 350.4720458984375, 533.6140747070312, 1349.64208984375, 1021.091552734375, 435.45404052734375, 608.6912231445312, 587.628662109375, 329.27789306640625, 255.5081024169922, 1075.840576171875, 356.52752685546875, 491.0901184082031, 321.3299865722656, 353.6808776855469, -116.45626831054688, 1143.282958984375, 137.778564453125, 521.655517578125, 646.3533935546875, 460.15020751953125, 747.4716186523438, 473.0003662109375, 24.132856369018555, -91.33285522460938, 1039.5836181640625, 1191.4222412109375, 316.2657775878906, 38.80903244018555, 1436.2005615234375, -453.0977478027344, -236.37841796875, 1456.9228515625, 582.093017578125, -172.247802734375, 104.48745727539062, 1121.8623046875, 1080.975830078125, 425.00579833984375, 214.32994079589844, 862.11572265625, 224.15057373046875, 385.7838439941406, 815.4998168945312, 235.81202697753906, 1901.91943359375, 482.12750244140625, 330.1201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 545.5555419921875, "std": 616.5260620117188, "min": -648.8934326171875, "p10": -107.39609374999995, "median": 517.1090698242188, "p90": 1318.8649780273438, "max": 2395.84375, "pos_frac": 0.84375, "sample": [596.1788940429688, 63.813514709472656, 700.201904296875, 2196.890380859375, 563.6212158203125, 1129.187255859375, 608.5369262695312, 997.03369140625, -56.66497802734375, 483.33734130859375, 779.2227783203125, 520.0633544921875, 110.64698791503906, 705.4944458007812, 299.9390869140625, 774.9144897460938, 174.90078735351562, 636.7964477539062, 1320.319091796875, -199.38165283203125, 61.732452392578125, 538.1610107421875, 487.14959716796875, 311.626220703125, -648.8934326171875, 18.51849365234375, 43.633384704589844, 1391.8536376953125, 224.22434997558594, -265.13623046875, 1315.4720458984375, -436.39642333984375, 587.5353393554688, -129.13800048828125, 572.1415405273438, 198.49583435058594, 417.505615234375, 2129.576171875, 1289.1209716796875, 1515.4559326171875, 574.779296875, -19.073570251464844, 514.15478515625, 759.7525634765625, 615.5679931640625, 2395.84375, 219.89556884765625, 291.33551025390625, 1107.8734130859375, 2087.70361328125, 540.088623046875, 177.7663116455078, 481.95428466796875, -317.3556823730469, -37.01629638671875, 521.3245849609375, -146.12632751464844, 204.7299346923828, 467.031982421875, 474.0081481933594, 561.431640625, 819.4537353515625, 590.5970458984375, 2.1454391479492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 297.54693603515625, "std": 486.0574645996094, "min": -567.9095458984375, "p10": -237.61245574951172, "median": 215.19841766357422, "p90": 945.2489135742188, "max": 1996.31103515625, "pos_frac": 0.703125, "sample": [1996.31103515625, 958.1488037109375, -213.43289184570312, 461.8742980957031, 426.9847412109375, -131.36688232421875, 67.0975341796875, -230.43731689453125, 153.8051300048828, 341.546630859375, 193.75363159179688, 225.0481414794922, 160.6326446533203, 117.70297241210938, -11.102363586425781, 279.14263916015625, -394.02264404296875, -12.388671875, 397.5245361328125, -96.22434997558594, 106.71145629882812, 140.84068298339844, 361.8901672363281, -567.9095458984375, 450.78424072265625, -75.86256408691406, 1179.1572265625, 61.94586181640625, -18.78844451904297, 334.7528381347656, -240.68751525878906, 363.2977600097656, -160.39892578125, -118.37599182128906, 723.016357421875, 354.03131103515625, 411.82293701171875, 298.3240966796875, 1036.640869140625, -43.700340270996094, 284.5037841796875, -247.24363708496094, 645.1162719726562, 479.9950256347656, 96.1989974975586, 267.33489990234375, -258.520263671875, 139.54095458984375, 578.775390625, 0.9753341674804688, 1173.7486572265625, -14.422492980957031, 915.149169921875, 352.37811279296875, 849.1806640625, 1046.478271484375, 205.34869384765625, 848.571044921875, -496.479248046875, 1616.33056640625, -249.59112548828125, 866.6996459960938, 19.79279327392578, 635.0514526367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 415.15557861328125, "std": 632.1915283203125, "min": -927.1163330078125, "p10": -411.6636291503906, "median": 384.115478515625, "p90": 1147.6602661132815, "max": 1882.2962646484375, "pos_frac": 0.796875, "sample": [194.29234313964844, -812.726318359375, 732.6716918945312, 376.0833435058594, 231.43417358398438, 925.5089111328125, 609.635498046875, 495.0674133300781, -426.8191223144531, 429.9921875, 1684.1622314453125, 316.2177734375, 249.35919189453125, 392.1476135253906, -196.6287384033203, 403.314697265625, 637.6690673828125, 837.3616943359375, 447.0459289550781, 70.9228286743164, -458.5420227050781, -718.0972900390625, -376.3008117675781, 1.9042739868164062, 1882.2962646484375, -45.61487579345703, 327.9771423339844, 531.402099609375, 409.28106689453125, 903.4422607421875, 527.487060546875, 1173.620849609375, 1514.0015869140625, 244.76849365234375, 1043.1312255859375, 308.8766784667969, 1051.84521484375, 2.4778404235839844, 1793.1541748046875, -170.35891723632812, 43.558929443359375, 354.60064697265625, 1087.0855712890625, 541.02685546875, 974.4923095703125, 149.39720153808594, 297.92486572265625, 4.82916259765625, 778.4891967773438, 694.8841552734375, -927.1163330078125, 727.117919921875, 253.50979614257812, -712.8137817382812, 822.031494140625, -323.1810302734375, -535.47021484375, 119.22327423095703, 415.32684326171875, 960.5150756835938, 1656.1348876953125, 1636.5692138671875, 135.1722412109375, -126.816650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 461.125244140625, "std": 666.2518310546875, "min": -888.6052856445312, "p10": -158.08636169433595, "median": 338.1500701904297, "p90": 1436.0959594726562, "max": 2253.056884765625, "pos_frac": 0.8125, "sample": [462.4658203125, 466.10968017578125, 1210.059326171875, 366.8337097167969, 514.718994140625, 1198.13623046875, 298.79669189453125, 261.3576354980469, -159.25811767578125, 192.15231323242188, 37.76576232910156, 755.1185913085938, 829.1343994140625, 308.037353515625, 125.36700439453125, 18.588869094848633, 253.44338989257812, -229.31533813476562, 2253.056884765625, 1178.1505126953125, 108.49322509765625, 1866.2081298828125, 125.06464385986328, 504.9144287109375, 748.1115112304688, 156.07748413085938, 457.579833984375, 55.43211364746094, 395.0508728027344, -22.268692016601562, 462.14886474609375, 969.8536376953125, 576.681396484375, 408.8526306152344, 9.874603271484375, 232.9165496826172, 790.1566772460938, -612.850341796875, 224.7156219482422, 50.11901092529297, -877.498779296875, 936.5470581054688, 1439.5494384765625, 1941.7879638671875, -288.0128479003906, 596.02001953125, 1791.5528564453125, 112.48796081542969, 437.0240783691406, 115.7989501953125, 1428.037841796875, -640.9286499023438, 1551.8350830078125, -888.6052856445312, -103.21839141845703, -95.51790618896484, 1982.42333984375, 413.549072265625, -155.35226440429688, 152.35699462890625, 1010.5064086914062, 584.0772705078125, -89.72337341308594, 309.4664306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 281.4146728515625, "std": 414.839111328125, "min": -328.340087890625, "p10": -198.0220458984375, "median": 242.79735565185547, "p90": 890.1289672851565, "max": 1437.3236083984375, "pos_frac": 0.734375, "sample": [1116.6268310546875, 82.17737579345703, 101.96975708007812, -33.9014892578125, 335.7179260253906, 189.94131469726562, 594.2684326171875, 423.9209899902344, -191.62689208984375, 344.0713195800781, 13.857444763183594, 742.3384399414062, 302.6671447753906, -204.25772094726562, 332.929443359375, 455.72967529296875, 1000.804443359375, 38.77532958984375, 693.49853515625, -94.84821319580078, -198.58140563964844, -68.2165298461914, -323.0030822753906, 115.02466583251953, -328.340087890625, -196.4527587890625, 315.9718933105469, -196.7168731689453, 834.5392456054688, -216.408935546875, 226.38018798828125, 268.01715087890625, 275.0857238769531, 155.5888671875, 913.4853515625, -45.021934509277344, 289.30670166015625, 198.5802459716797, 118.82474517822266, -148.862060546875, -145.98553466796875, 537.1241455078125, 259.22991943359375, 178.73370361328125, -218.31423950195312, 513.989013671875, 835.6307373046875, 282.7666015625, 1137.21484375, 167.57254028320312, 248.4676513671875, -113.55420684814453, 691.2369995117188, 183.91375732421875, 1415.870361328125, 86.58277130126953, 941.2694091796875, 1437.3236083984375, 545.5931396484375, 270.30853271484375, 334.0454406738281, -311.9479675292969, 237.12705993652344, 262.47943115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 404.26434326171875, "std": 602.2828369140625, "min": -1295.26513671875, "p10": -240.714485168457, "median": 341.79347229003906, "p90": 1250.4900268554688, "max": 2068.3427734375, "pos_frac": 0.78125, "sample": [-1295.26513671875, 520.998779296875, -792.268310546875, 614.8970947265625, 280.8980407714844, 243.3328857421875, 113.19349670410156, 555.9049682617188, 599.3748168945312, 51.942779541015625, 801.3671264648438, 1196.716064453125, 860.076904296875, 33.4693603515625, 202.27430725097656, 279.6407470703125, 566.7561645507812, 436.240234375, -79.68707275390625, 83.58539581298828, 150.29298400878906, 1242.5390625, 1257.294921875, 679.6297607421875, 57.06610107421875, 503.1398620605469, -79.59808349609375, -248.63771057128906, -189.4039306640625, 711.0128784179688, 258.052978515625, 560.5443115234375, 1074.2979736328125, 431.555419921875, 304.6069641113281, 231.60923767089844, 498.0473937988281, -479.41644287109375, 1095.06494140625, 493.07537841796875, 715.0288696289062, -431.9735412597656, 535.1863403320312, 155.03155517578125, 84.47566223144531, -458.50360107421875, 614.740966796875, 1415.924560546875, -222.22695922851562, 2068.3427734375, -58.7690315246582, 1943.8035888671875, 1254.6995849609375, 282.3763732910156, 997.8385620117188, 11.009811401367188, -141.39385986328125, 1253.8975830078125, -316.52532958984375, 1317.1414794921875, -28.470970153808594, 398.6725158691406, 378.97998046875, 279.4066162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 544.393798828125, "std": 577.3419799804688, "min": -1345.92724609375, "p10": -72.98057899475097, "median": 479.67909240722656, "p90": 1289.613757324219, "max": 1726.01171875, "pos_frac": 0.828125, "sample": [390.103271484375, 1195.3104248046875, 292.9185485839844, 1326.720703125, 234.20254516601562, 403.04150390625, 478.5018310546875, 73.75071716308594, 480.9568176269531, -413.7155456542969, -212.43179321289062, 951.5072631835938, 1068.18408203125, 851.1815795898438, 480.8563537597656, 1054.5616455078125, 347.06597900390625, 249.07315063476562, 1512.4779052734375, 1520.2332763671875, -111.414306640625, 305.609130859375, 585.3907470703125, 926.3523559570312, -17.78205108642578, 391.2561340332031, 1148.861572265625, 242.4508819580078, 1468.7198486328125, 496.28729248046875, 441.8170471191406, 573.635009765625, 854.4275512695312, 1343.802490234375, 1488.67236328125, 983.30224609375, -1345.92724609375, -63.21993637084961, 609.0677490234375, 389.11822509765625, 383.7719421386719, -5.856412887573242, 90.81532287597656, 423.4444580078125, 421.61346435546875, -77.16371154785156, -501.17034912109375, 1726.01171875, 853.4397583007812, -7.801908493041992, 814.1734619140625, 948.2916870117188, 485.87274169921875, 612.3450927734375, 1091.227783203125, -678.3665771484375, 978.3572387695312, 235.90985107421875, 452.8426513671875, 95.78189086914062, 194.09939575195312, 1203.0308837890625, 1164.6251220703125, 940.977783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 450.9721374511719, "std": 539.0526733398438, "min": -802.55859375, "p10": -98.992130279541, "median": 373.0220031738281, "p90": 1138.5429077148438, "max": 1810.403076171875, "pos_frac": 0.84375, "sample": [729.7200927734375, 986.8963012695312, 617.658203125, 449.8660888671875, 64.58899688720703, 56.65254211425781, 176.83876037597656, 121.55299377441406, 1155.4212646484375, 1782.14794921875, 1026.3631591796875, 373.9931640625, 961.1717529296875, -78.16665649414062, 160.16842651367188, 764.8157958984375, 1094.838134765625, 1177.7332763671875, -802.55859375, -303.4178466796875, 1122.30712890625, 311.5993957519531, 167.48983764648438, 494.37786865234375, 372.05084228515625, 852.9721069335938, -147.03671264648438, -103.56378173828125, 705.8955688476562, 401.83026123046875, 56.75275421142578, 44.436161041259766, 144.13681030273438, 11.606483459472656, 84.01231384277344, 97.50849914550781, 124.45547485351562, -88.32494354248047, 237.9043731689453, 214.25039672851562, 1503.975341796875, 637.1580200195312, 665.934814453125, 138.82925415039062, -673.1852416992188, 789.0265502929688, 397.8640441894531, 594.2388916015625, 1047.764404296875, -87.0880355834961, 583.0453491210938, 319.3356018066406, 395.771484375, 1145.5010986328125, 1221.66162109375, 1810.403076171875, -268.61529541015625, 1028.62744140625, 1075.3819580078125, 310.17755126953125, 268.0029296875, 134.26853942871094, -422.5567626953125, 625.7490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 295.437744140625, "std": 483.1470031738281, "min": -951.4070434570312, "p10": -224.77743225097652, "median": 291.20603942871094, "p90": 1027.4322204589846, "max": 1544.2213134765625, "pos_frac": 0.765625, "sample": [871.06689453125, -355.6727294921875, -110.7831802368164, 1051.5123291015625, -184.08401489257812, 539.7640991210938, 73.3990707397461, -55.13703155517578, 138.48590087890625, 425.8612365722656, 388.82275390625, 115.52346801757812, 226.1343994140625, 539.0213012695312, 1544.2213134765625, -63.87688446044922, 665.3746948242188, 1366.99267578125, 303.99200439453125, 971.2453002929688, 743.563232421875, 18.04150390625, 212.00381469726562, 1067.8858642578125, 604.1708374023438, -504.10235595703125, 81.61053466796875, 433.8494873046875, 380.86785888671875, -189.84243774414062, 681.2869873046875, -108.25520324707031, 29.497417449951172, 414.7618713378906, 482.267822265625, -539.78515625, 164.2943115234375, 1081.347900390625, 401.0238952636719, 463.50421142578125, 342.4215087890625, -239.74957275390625, 252.29144287109375, 6.527618408203125, 584.831787109375, 1119.578857421875, 0.9920787811279297, -6.261436462402344, -178.2420196533203, 1154.845703125, 433.362060546875, 236.33163452148438, 406.9039611816406, 278.4200744628906, 306.12359619140625, 216.26211547851562, -341.64337158203125, 400.4774169921875, 399.40887451171875, 644.6646118164062, -803.1771240234375, 159.62078857421875, -951.4070434570312, 115.57818603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 297.7703857421875, "std": 574.4232788085938, "min": -1999.3037109375, "p10": -197.93146667480465, "median": 248.55956268310547, "p90": 999.8277587890627, "max": 1804.9237060546875, "pos_frac": 0.734375, "sample": [577.2076416015625, 370.92236328125, -127.65531158447266, 114.69283294677734, -980.8579711914062, 1026.9456787109375, 777.0393676757812, -13.305801391601562, 636.7870483398438, 1099.638427734375, 103.96841430664062, 493.2629699707031, 1425.8663330078125, -2.0802764892578125, 118.72696685791016, -45.279998779296875, 135.87469482421875, -128.63809204101562, 300.91961669921875, 329.18310546875, 1197.496826171875, 142.91030883789062, 581.5709838867188, 363.9895935058594, 31.14434814453125, 182.5181121826172, 924.5631103515625, 423.19580078125, 24.073232650756836, 1804.9237060546875, 669.35107421875, 1337.5177001953125, 3.0636444091796875, 287.6747131347656, 936.5526123046875, -363.72845458984375, -316.2828063964844, 505.255859375, 22.44580841064453, 582.9066772460938, 197.98826599121094, -507.5636901855469, 29.66307830810547, 655.3757934570312, 639.0850830078125, -117.56432342529297, 158.06317138671875, 885.2780151367188, 840.12451171875, 307.4893493652344, -1999.3037109375, 1280.165771484375, 209.4444122314453, -53.57738494873047, 347.1429443359375, -214.29119873046875, 433.039794921875, -45.35844421386719, 519.9239501953125, 291.2196044921875, -246.6578369140625, 63.660797119140625, -10.645538330078125, -159.75875854492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 381.39727783203125, "std": 556.6820678710938, "min": -1131.7908935546875, "p10": -257.86999206542964, "median": 395.73463439941406, "p90": 959.5156799316406, "max": 2154.901611328125, "pos_frac": 0.765625, "sample": [571.3551025390625, 2154.901611328125, 1117.38037109375, 397.7630920410156, 131.07620239257812, 398.1102600097656, 567.9783935546875, 275.14288330078125, 853.1695556640625, 573.2824096679688, 859.7664794921875, 957.2142333984375, 158.43624877929688, 464.7583312988281, 922.0963134765625, -363.8731689453125, 292.93890380859375, 681.170654296875, 276.52972412109375, 468.5947570800781, 439.313232421875, 512.1681518554688, 329.3095703125, -287.1584167480469, 5.399620056152344, 760.13134765625, 913.9998779296875, -113.125732421875, 1024.4591064453125, 166.32354736328125, -143.275390625, -189.53033447265625, -14.155351638793945, 690.9891357421875, -371.5103759765625, 1990.69580078125, -121.55523681640625, 449.216796875, 612.57666015625, 757.3200073242188, 698.3382568359375, 93.23200988769531, 630.089599609375, 223.6871337890625, -1131.7908935546875, 324.3990173339844, -169.74391174316406, 751.1568603515625, 501.0263671875, 1012.0945434570312, -87.67559814453125, -78.97459411621094, 124.4700698852539, 36.31809616088867, 718.5455322265625, 393.7061767578125, -537.8530883789062, 69.27452850341797, 170.4224090576172, -464.2320556640625, 114.046142578125, -475.1002197265625, 960.5020141601562, 1364.1044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 304.4427490234375, "std": 656.0781860351562, "min": -2168.16845703125, "p10": -356.41716918945315, "median": 378.3619079589844, "p90": 916.8670043945312, "max": 1666.437744140625, "pos_frac": 0.78125, "sample": [233.97299194335938, -2168.16845703125, -1139.86572265625, 449.6855773925781, 918.6502685546875, 155.011962890625, 912.7060546875, 116.58633422851562, 289.4731750488281, 606.77490234375, 737.2320556640625, 241.5521240234375, -437.8429260253906, 783.982421875, 685.0880737304688, 728.7557983398438, 140.2307586669922, 1666.437744140625, 749.1113891601562, -90.74620056152344, 726.323974609375, 88.30681610107422, -152.00576782226562, 176.84825134277344, 782.6381225585938, -28.315654754638672, 604.1845703125, 332.7032470703125, 1106.39697265625, 826.9212646484375, 102.634765625, 703.80517578125, -1477.2647705078125, 588.920166015625, 60.16947937011719, 298.47271728515625, 35.29129409790039, 405.8178405761719, 561.608154296875, 441.13983154296875, 889.3632202148438, 592.8068237304688, 1039.8197021484375, 101.27619171142578, 1073.58203125, -31.1983642578125, -1231.5623779296875, 1646.9652099609375, -409.4212646484375, 1364.2525634765625, -82.07321166992188, 659.9418334960938, 21.45325469970703, -352.3902893066406, 669.8514404296875, -358.1429748535156, 490.33001708984375, -329.241943359375, 110.90443420410156, 470.24176025390625, 421.40692138671875, 350.9059753417969, 180.47952270507812, 431.5616149902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 348.56048583984375, "std": 496.36199951171875, "min": -841.6109008789062, "p10": -251.2184173583984, "median": 303.56182861328125, "p90": 966.8643676757813, "max": 1687.1138916015625, "pos_frac": 0.796875, "sample": [324.216064453125, 433.7627868652344, 753.2327880859375, 256.9796142578125, 621.2081298828125, 494.2044677734375, 599.786376953125, -433.0364074707031, 136.50155639648438, -532.2923583984375, 478.9149475097656, 629.3821411132812, -287.420166015625, 525.1224365234375, 815.7854614257812, 40.27119445800781, -269.1960144042969, 965.69140625, 936.6586303710938, 313.95745849609375, 384.8518371582031, 242.91163635253906, 315.96728515625, -204.190185546875, -361.73333740234375, 394.23101806640625, -346.1192932128906, 201.07681274414062, 39.023101806640625, -841.6109008789062, 300.3990478515625, 1687.1138916015625, 534.6539306640625, -173.34332275390625, 74.83365631103516, 339.63739013671875, 82.41511535644531, 143.37408447265625, 262.864990234375, -41.7652702331543, 560.5870971679688, 1567.3719482421875, -71.8687744140625, 306.724609375, 646.8157348632812, 693.0374755859375, 1363.27392578125, 893.5779418945312, 1240.932861328125, 248.51861572265625, 147.2138671875, 707.4010009765625, 129.09475708007812, 96.69308471679688, 112.91462707519531, 224.69432067871094, 200.25852966308594, 994.6351318359375, 318.1208190917969, -89.69581604003906, 1327.9033203125, -209.27069091796875, 967.3670654296875, 93.24698638916016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 389.9073486328125, "std": 644.5042724609375, "min": -1169.8341064453125, "p10": -337.99211120605463, "median": 268.1229553222656, "p90": 1073.6580688476563, "max": 2214.895263671875, "pos_frac": 0.703125, "sample": [-124.72377014160156, 1009.3975219726562, 1055.2705078125, -1169.8341064453125, 200.96173095703125, 890.9017944335938, 2000.695068359375, 40.25968933105469, -200.99232482910156, -377.012939453125, 575.025634765625, 966.4324340820312, -279.23846435546875, -610.180908203125, 198.71966552734375, -539.3330688476562, -355.4056701660156, -125.28387451171875, -118.43478393554688, 494.1659851074219, 1015.743408203125, 186.32919311523438, 298.5343017578125, 1517.0247802734375, -297.3604736328125, -35.27587127685547, 245.39431762695312, 174.9345703125, -525.9095458984375, 886.5360717773438, 375.05426025390625, 247.80274963378906, 605.8446655273438, 904.4712524414062, 704.3864135742188, -3.1486663818359375, 829.2460327148438, 501.33612060546875, -261.75897216796875, -14.226676940917969, -99.78411865234375, 802.619140625, 573.8966674804688, 1432.700439453125, 719.5557250976562, 2214.895263671875, 579.964599609375, 1883.741943359375, 1081.5384521484375, -449.2452087402344, 37.1763916015625, 20.456802368164062, 842.881591796875, 506.73974609375, 772.8250732421875, 293.5779113769531, 924.4866333007812, 280.181640625, 90.20184326171875, 58.019371032714844, -108.02587890625, 210.67697143554688, 256.06427001953125, 1142.5762939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 450.98480224609375, "std": 701.419189453125, "min": -1155.53125, "p10": -371.58937072753906, "median": 522.7462158203125, "p90": 1281.2236206054693, "max": 1979.236328125, "pos_frac": 0.671875, "sample": [-176.31683349609375, -685.7689208984375, 783.942138671875, 1637.55810546875, 274.7832336425781, -129.09426879882812, 891.23388671875, 300.0004577636719, -932.5626220703125, 476.9669494628906, 578.2347412109375, -170.6747283935547, -49.82318115234375, 741.9340209960938, 647.1963500976562, 382.123779296875, 862.8907470703125, -72.65412902832031, 19.261600494384766, -1155.53125, 920.0545043945312, 653.2696533203125, 479.5245361328125, -148.3267822265625, 971.9293212890625, -333.0566101074219, 679.8614501953125, 1168.3770751953125, -80.84910583496094, 1055.0545654296875, 767.7258911132812, 1152.3572998046875, 762.1207275390625, 126.96058654785156, 920.3504638671875, -284.6988220214844, -163.26480102539062, 1879.649169921875, 725.3290405273438, -309.5914306640625, -376.67974853515625, 699.3638916015625, 507.66094970703125, 1596.989013671875, 1117.7030029296875, 1684.5487060546875, 1979.236328125, 816.12109375, -725.9613037109375, 537.8314819335938, -715.8665161132812, 1329.58642578125, 773.6619873046875, 1636.38623046875, -26.1861629486084, -407.14984130859375, 1065.2674560546875, 391.99871826171875, 1090.87109375, -359.7118225097656, -68.95323944091797, 15.5927734375, 635.5911865234375, 498.6477355957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 332.55987548828125, "std": 472.9836120605469, "min": -1070.1356201171875, "p10": -126.58333358764646, "median": 265.6629943847656, "p90": 980.9107421875, "max": 1852.319091796875, "pos_frac": 0.8125, "sample": [-84.58533477783203, 85.42769622802734, 1128.2884521484375, 718.71875, 322.8662109375, 439.913818359375, 103.52680206298828, -101.98148345947266, 981.5903930664062, 220.6282958984375, 900.4697265625, -22.38268280029297, 1004.1456909179688, 254.0855712890625, 4.094146728515625, 375.74420166015625, 1216.6748046875, 615.8302612304688, 493.265625, 149.4692840576172, 465.289306640625, 143.30441284179688, -311.3111267089844, 418.61370849609375, 234.66183471679688, 713.2742309570312, 631.4512939453125, 1137.985107421875, -38.35960388183594, 631.940673828125, 164.3805389404297, 376.38873291015625, 266.3570556640625, -1070.1356201171875, 250.19189453125, -37.45589065551758, 49.55716323852539, 814.923583984375, 1852.319091796875, 1017.8956298828125, 128.47506713867188, -625.3738403320312, 339.79815673828125, 164.87991333007812, 192.8712921142578, 979.3248901367188, 421.226806640625, 224.13092041015625, 224.74249267578125, 295.87060546875, -205.44300842285156, 924.923828125, 66.10869598388672, 548.6813354492188, -236.1380157470703, 264.96893310546875, 49.493995666503906, 288.4873046875, -137.12698364257812, 196.62640380859375, 342.41351318359375, 494.6235046386719, -635.1785278320312, 458.38214111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 454.1708068847656, "std": 486.7474670410156, "min": -705.51220703125, "p10": -156.03224792480458, "median": 426.3395538330078, "p90": 1011.76162109375, "max": 1764.5419921875, "pos_frac": 0.859375, "sample": [618.9326782226562, -705.51220703125, 1057.820556640625, 631.9578247070312, 492.441650390625, 425.3689270019531, 738.1373291015625, 658.524658203125, 418.6606140136719, -600.7788696289062, -205.66493225097656, -202.48373413085938, 622.6827392578125, 1572.5313720703125, 246.08551025390625, 536.31591796875, 236.20169067382812, 218.01470947265625, 313.92059326171875, 105.54808044433594, 594.075927734375, 454.1232604980469, 1060.4251708984375, 853.1836547851562, 427.2643737792969, 113.10086059570312, 311.5547790527344, 1764.5419921875, 141.84805297851562, 427.9654846191406, 560.253173828125, 795.302734375, 986.091064453125, 608.03759765625, 977.3031616210938, 374.2601623535156, 369.0010986328125, 517.0645751953125, 212.1017608642578, 733.7718505859375, 1087.7880859375, -47.645442962646484, 876.3077392578125, 879.1561279296875, 0.7247161865234375, 235.95928955078125, 54.56698226928711, -311.0604553222656, 416.59130859375, 1018.6815795898438, -25.311330795288086, 1442.3662109375, 425.41473388671875, -570.2191162109375, 284.9693298339844, -391.46527099609375, 995.6150512695312, 989.0651245117188, 263.4907531738281, 643.1609497070312, 163.05426025390625, 794.9197998046875, 63.40276336669922, 317.42230224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 385.0685119628906, "std": 638.4990844726562, "min": -1569.23486328125, "p10": -236.04531097412107, "median": 286.82311248779297, "p90": 1244.5034790039062, "max": 1902.2720947265625, "pos_frac": 0.765625, "sample": [-507.20758056640625, 179.84268188476562, 1719.91455078125, 209.62811279296875, 683.5645751953125, -198.4684295654297, 648.1826782226562, 101.14724731445312, -424.27252197265625, -43.27565383911133, 842.8660888671875, 1241.808837890625, 524.68408203125, 349.8024597167969, 367.0375061035156, -425.5522766113281, 242.60499572753906, 784.6668090820312, -252.14968872070312, 70.31685638427734, -1033.7457275390625, 1724.4364013671875, 27.61043930053711, -74.65142822265625, -137.73410034179688, 632.2496337890625, 1108.2315673828125, 80.50309753417969, 46.995147705078125, 251.66363525390625, 1351.122802734375, 923.4222412109375, 591.6151733398438, -57.831146240234375, 1395.7806396484375, 318.3801574707031, 182.6510009765625, 345.6506042480469, 734.7705688476562, -1569.23486328125, 383.1476135253906, 149.10533142089844, -181.48675537109375, 1103.8804931640625, 823.8936767578125, 137.086181640625, 910.0033569335938, 217.85137939453125, -184.30282592773438, 415.6771240234375, -189.15724182128906, 1669.3330078125, 6.764396667480469, 564.8347778320312, 931.8306274414062, -328.76953125, 133.58590698242188, 564.4385986328125, 255.2660675048828, 134.26385498046875, 394.73333740234375, 627.4476318359375, 1245.6583251953125, 1902.2720947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 338.134765625, "std": 500.54248046875, "min": -717.1749267578125, "p10": -263.4466400146484, "median": 254.93336486816406, "p90": 975.5864562988281, "max": 1691.8343505859375, "pos_frac": 0.78125, "sample": [-102.06947326660156, 1120.3216552734375, 411.02435302734375, 1470.1993408203125, 1691.8343505859375, 101.31806945800781, 693.6041870117188, 1036.7685546875, 1335.18408203125, 315.0399169921875, 388.5066223144531, 832.3739013671875, 305.5226745605469, 387.0433349609375, -52.578102111816406, 262.03546142578125, 816.4317016601562, 362.250732421875, 705.2593994140625, -315.91802978515625, 635.5542602539062, -279.3195495605469, -474.1479797363281, -634.1204223632812, 128.84483337402344, 938.6223754882812, -577.4282836914062, -92.0936508178711, 227.28582763671875, 436.00616455078125, 484.3200378417969, 371.43084716796875, -189.406005859375, -281.0523681640625, 247.83126831054688, 200.4043731689453, 226.96719360351562, 486.7218017578125, 76.04058074951172, 971.8662109375, 558.665771484375, -124.14539337158203, 148.93551635742188, 93.93000030517578, 968.4707641601562, 268.13897705078125, 185.72816467285156, 692.0777587890625, 977.1808471679688, 957.5872802734375, 151.5063934326172, 94.31497192382812, 605.79052734375, 429.5281982421875, 12.765434265136719, -717.1749267578125, 243.769775390625, 175.1049346923828, 125.71891784667969, -101.55018615722656, -226.40985107421875, 1185.0172119140625, 221.88314819335938, 45.31046676635742], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 419.9873046875, "std": 607.4080200195312, "min": -851.1560668945312, "p10": -372.6182525634765, "median": 363.88059997558594, "p90": 1215.3657836914067, "max": 2093.77587890625, "pos_frac": 0.75, "sample": [554.4801635742188, 900.7430419921875, 104.47322082519531, 1105.0675048828125, -19.931909561157227, 572.1038818359375, 105.9091796875, 1021.4120483398438, 370.004638671875, 981.5452880859375, 1737.4345703125, -388.1440124511719, -75.48384857177734, 1581.0311279296875, 188.4027557373047, 455.00689697265625, 733.0863037109375, 793.7318725585938, 357.7565612792969, 834.0479125976562, 2093.77587890625, 1372.6185302734375, 822.8956909179688, 1040.437744140625, -442.5014953613281, -23.069704055786133, 521.0931396484375, 406.99176025390625, 1262.636474609375, 1031.5538330078125, 305.1605529785156, -23.80303955078125, 525.94189453125, 1437.15283203125, 624.228271484375, -0.3162422180175781, -336.3914794921875, 214.91583251953125, 1343.031494140625, 483.8919677734375, 903.3137817382812, -391.9242248535156, 1081.5975341796875, 232.88197326660156, 278.03814697265625, 198.8538818359375, 594.6450805664062, 221.61244201660156, 481.8159484863281, -118.63203430175781, -504.95330810546875, -851.1560668945312, 240.1248779296875, 50.695892333984375, -846.5305786132812, 295.09698486328125, 237.1633758544922, 395.3564147949219, -705.7801513671875, 102.48448181152344, -74.00830078125, 531.0548706054688, -72.86288452148438, 27.37939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 416.5491943359375, "std": 684.39501953125, "min": -1104.5218505859375, "p10": -327.5100982666015, "median": 357.93548583984375, "p90": 1253.7288574218753, "max": 2581.06640625, "pos_frac": 0.71875, "sample": [-334.8320007324219, -525.3566284179688, -264.52362060546875, 537.3374633789062, 1672.4046630859375, 179.57403564453125, 64.73397827148438, 38.77204132080078, -24.07314682006836, 175.57437133789062, -166.7236785888672, 1098.024658203125, 855.763916015625, 351.7091064453125, 593.8795776367188, 1171.341796875, 1094.1256103515625, 1137.4327392578125, 1289.03759765625, 626.4848022460938, 243.1623077392578, 398.19580078125, -30.341999053955078, 1077.963623046875, -1014.0184936523438, 67.86873626708984, 364.161865234375, -364.84503173828125, -177.80429077148438, 901.0556030273438, -310.4256591796875, 499.8236999511719, 481.7901611328125, 60.909088134765625, 258.3831481933594, 2581.06640625, -24.34430694580078, 432.1152648925781, -211.8754425048828, -457.03570556640625, 1326.5653076171875, 226.76512145996094, -273.13848876953125, 476.9349060058594, -307.45196533203125, -457.4115905761719, 1135.22509765625, 717.9110107421875, 553.8143310546875, -25.82628631591797, 927.953369140625, 139.88067626953125, 329.0169372558594, 1822.7169189453125, 301.606201171875, 394.6923522949219, 624.550048828125, 1531.79248046875, 4.25762939453125, -1104.5218505859375, 521.5718383789062, 1153.087890625, 1676.773193359375, 615.8916625976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 372.70294189453125, "std": 522.845947265625, "min": -686.6841430664062, "p10": -135.9394348144531, "median": 292.4709014892578, "p90": 878.9013671875001, "max": 2240.9775390625, "pos_frac": 0.78125, "sample": [657.7544555664062, -132.52056884765625, -110.54715728759766, 5.40380859375, 100.53700256347656, 2240.9775390625, 791.8680419921875, -137.4046630859375, -12.702995300292969, 311.29205322265625, 556.0927124023438, 178.53334045410156, -686.6841430664062, 1983.9617919921875, 451.4589538574219, 441.0125732421875, 1577.2454833984375, -567.0833129882812, -46.82665252685547, -107.99291229248047, -410.48931884765625, 888.3237915039062, 622.8773803710938, 436.9217529296875, 64.35236358642578, 342.96038818359375, 710.707763671875, 1072.572998046875, 141.45025634765625, 1075.6162109375, -231.66600036621094, 856.9157104492188, 506.8692321777344, 356.37939453125, 805.8037109375, -289.3031921386719, 215.36163330078125, 47.30419921875, 808.5885009765625, -42.24230194091797, 824.300048828125, 107.4168472290039, 193.56402587890625, -109.4886703491211, 384.7563171386719, 21.260726928710938, 251.78424072265625, 254.94371032714844, 772.0377197265625, -229.178955078125, 515.706787109375, 182.70132446289062, 214.55010986328125, 621.4378662109375, 516.61572265625, 65.8945541381836, 273.6497497558594, 440.0038146972656, 643.5653076171875, 627.1268920898438, 69.8834228515625, 681.8154296875, 919.3611450195312, 135.6004638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 407.0522766113281, "std": 551.9092407226562, "min": -874.2797241210938, "p10": -292.603140258789, "median": 314.52650451660156, "p90": 1215.1002441406254, "max": 1908.0777587890625, "pos_frac": 0.78125, "sample": [136.37326049804688, 671.5878295898438, -229.75094604492188, 72.04053497314453, 1126.8358154296875, -19.127614974975586, 124.07958984375, 1351.888671875, 1037.907470703125, 744.1973876953125, 186.81201171875, 1384.030517578125, 1027.5743408203125, 585.053466796875, 178.3494873046875, 465.70245361328125, 245.05816650390625, 521.3182373046875, -568.2952270507812, 1908.0777587890625, 550.5465698242188, 730.0328979492188, 433.54180908203125, 478.0867919921875, 322.76373291015625, 700.0761108398438, 306.2892761230469, -319.539794921875, 66.93437194824219, -135.4881591796875, 215.41476440429688, -475.0632019042969, 284.9526672363281, 1375.3858642578125, -337.3857421875, 723.6837158203125, -440.1838073730469, 819.0906982421875, 301.57904052734375, 634.63671875, 1014.3397827148438, -402.030517578125, 689.4002685546875, 685.69287109375, 369.7842712402344, 180.57540893554688, 404.4127197265625, -3.7584381103515625, -874.2797241210938, -202.68161010742188, 276.1937561035156, 6.881095886230469, 30.54118537902832, 1402.928955078125, -21.183380126953125, 725.3721923828125, 286.5762634277344, 1252.9278564453125, 161.0626678466797, 594.2797241210938, 188.0937042236328, 1536.0609130859375, 595.3639526367188, -30.275442123413086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 359.38861083984375, "std": 644.5065307617188, "min": -1261.2242431640625, "p10": -375.18215026855455, "median": 362.4028625488281, "p90": 1163.9693847656251, "max": 1953.07666015625, "pos_frac": 0.75, "sample": [1186.3079833984375, 236.86891174316406, 919.1615600585938, 385.76617431640625, 516.1130981445312, -646.2646484375, 557.4781494140625, 663.5020751953125, 206.56692504882812, 99.58280944824219, 60.48926544189453, 100.0851058959961, -208.59878540039062, 768.5271606445312, 461.15655517578125, 180.0460968017578, -197.64630126953125, 985.95263671875, 339.03955078125, 291.573974609375, 318.44403076171875, -98.565673828125, 1021.1373901367188, 435.22515869140625, 400.6285400390625, 253.80917358398438, -1195.360107421875, 79.15896606445312, -649.93310546875, 434.42047119140625, -660.1669311523438, 1166.902587890625, 916.5839233398438, 1276.318359375, 286.09576416015625, -158.8921356201172, -227.41351318359375, -71.50463104248047, 1284.3663330078125, 1058.539794921875, -162.12420654296875, 188.08148193359375, -438.5115661621094, 1106.4405517578125, 1157.125244140625, -871.2916259765625, 427.5502014160156, 591.8114013671875, 748.5789794921875, 1914.3492431640625, 397.0167541503906, 856.751708984375, 500.4385681152344, -162.7286376953125, 1421.5391845703125, 100.26647186279297, 567.8185424804688, -1261.2242431640625, 1953.07666015625, 22.537425994873047, -150.39227294921875, 577.3956298828125, 152.6003875732422, 588.262451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 304.60137939453125, "std": 524.363037109375, "min": -906.8125610351562, "p10": -266.30333862304684, "median": 260.41556549072266, "p90": 960.0287719726563, "max": 1582.8878173828125, "pos_frac": 0.734375, "sample": [303.03033447265625, 976.1121826171875, 107.64115905761719, 27.486942291259766, -273.27960205078125, -67.65974426269531, -188.4498748779297, 1582.8878173828125, 713.0016479492188, 137.26763916015625, 355.2369689941406, 1514.6904296875, 125.54586791992188, 114.19886779785156, 524.7263793945312, 348.95660400390625, 367.0529479980469, -652.8794555664062, 274.9764099121094, 545.78271484375, 461.10894775390625, 907.4326171875, 719.6826782226562, 859.3692626953125, -3.580188751220703, 1371.558349609375, -7.910015106201172, -399.82891845703125, -15.347930908203125, 889.0287475585938, -148.5408172607422, 255.67039489746094, 133.26217651367188, 55.929588317871094, 613.1450805664062, 630.68603515625, 1582.0543212890625, 294.4219970703125, 153.50128173828125, 578.5974731445312, 67.41114044189453, 1000.7586669921875, 108.66153717041016, 413.947021484375, 76.59078216552734, -906.8125610351562, -80.4288558959961, 512.8221435546875, -250.025390625, 511.19879150390625, 367.5731506347656, 959.0323486328125, 960.455810546875, 859.2230834960938, -845.4573364257812, -422.2904968261719, 206.86187744140625, 301.3831481933594, 265.1607360839844, 154.513427734375, -308.82501220703125, -25.63027572631836, -227.26611328125, 29.063148498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 375.4349060058594, "std": 564.64306640625, "min": -1392.25830078125, "p10": -85.85356292724606, "median": 302.6417236328125, "p90": 1151.0088623046875, "max": 2178.268798828125, "pos_frac": 0.875, "sample": [-1392.25830078125, 2178.268798828125, 1146.3990478515625, 1183.1253662109375, 94.00431060791016, 57.91642761230469, 284.1932067871094, 108.78309631347656, 310.24664306640625, -51.058074951171875, 702.1558837890625, 438.8370056152344, 474.52154541015625, 403.9076843261719, 401.0552978515625, 733.2135620117188, 1044.64794921875, 1151.44970703125, -354.9456787109375, 638.836181640625, 144.26853942871094, 244.46707153320312, -619.3837890625, 88.73681640625, 333.7218017578125, 752.9205322265625, 133.7501678466797, 279.34942626953125, 490.17572021484375, 456.80303955078125, 101.05551147460938, 353.30511474609375, 290.3880310058594, 21.67670249938965, 1561.2843017578125, 66.51263427734375, 293.62823486328125, -563.9490966796875, 157.9568634033203, -421.5240478515625, 290.2391662597656, 140.99008178710938, 479.07373046875, 295.03680419921875, 76.32817840576172, 537.9224853515625, 87.55216979980469, 682.8082885742188, 671.6201782226562, 1149.980224609375, 116.29408264160156, 45.489646911621094, 1247.41357421875, 27.140668869018555, 1593.8839111328125, 599.5167236328125, 365.5756530761719, -548.9375, -100.76591491699219, 1300.8450927734375, 5.1606903076171875, 591.2669067382812, 332.4965515136719, 322.45880126953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 411.31219482421875, "std": 483.0921630859375, "min": -773.883056640625, "p10": -142.67158508300778, "median": 403.7938232421875, "p90": 1072.0050415039066, "max": 1469.0887451171875, "pos_frac": 0.8125, "sample": [1438.515380859375, 160.61648559570312, 704.0465087890625, 103.29388427734375, 604.47216796875, 283.61541748046875, 1367.8677978515625, 738.913330078125, 173.57989501953125, -120.78056335449219, 116.42605590820312, 738.6489868164062, 469.9306640625, 239.93692016601562, 154.07308959960938, -410.1839294433594, 138.4219207763672, 29.049827575683594, 1449.806640625, 892.6024169921875, 664.2192993164062, 434.9722900390625, 921.7369384765625, -152.05345153808594, -110.66706085205078, 583.6948852539062, 862.9381103515625, -84.34857940673828, -307.8073425292969, 65.27057647705078, 2.1219329833984375, -174.910888671875, 477.33233642578125, 182.80307006835938, 137.72328186035156, 598.8560791015625, -43.01807403564453, 327.787353515625, -773.883056640625, -26.24144744873047, 453.5044860839844, 651.8512573242188, 611.8474731445312, 413.3059387207031, 1154.0426025390625, 689.4081420898438, 324.1533203125, 54.310302734375, 715.188720703125, 137.42689514160156, 394.2817077636719, 800.7596435546875, 74.67390441894531, 1114.384765625, 903.75830078125, 1469.0887451171875, -190.29348754882812, 431.958251953125, 1349.258544921875, 973.1190185546875, 419.625, -176.47149658203125, 81.78413391113281, 613.6353149414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 270.0508728027344, "std": 524.1898803710938, "min": -1205.6998291015625, "p10": -357.74076232910147, "median": 274.43394470214844, "p90": 802.1321044921878, "max": 1905.06005859375, "pos_frac": 0.75, "sample": [16.548240661621094, -157.1572723388672, -1205.6998291015625, 530.8785400390625, -90.11288452148438, 104.85739135742188, 889.680419921875, 260.0714416503906, 506.6753234863281, 399.0298156738281, 78.05974578857422, -68.90538024902344, -583.92919921875, 304.02984619140625, 250.60775756835938, 607.1063232421875, 149.8291473388672, 659.3032836914062, 1166.2421875, 76.57304382324219, 717.760009765625, 92.93494415283203, 365.3988037109375, 540.742919921875, -16.758859634399414, 412.4779052734375, 390.153564453125, -116.78397369384766, 626.5054931640625, 375.12457275390625, 507.36773681640625, -396.71319580078125, 500.08770751953125, 601.6458129882812, 1905.06005859375, 1481.54443359375, 283.08795166015625, -621.2417602539062, -24.92758560180664, 465.8125305175781, 1038.343994140625, -266.8050842285156, 822.7465209960938, -193.09323120117188, 1508.506591796875, 13.030866622924805, 192.22911071777344, -112.01478576660156, -439.646484375, 251.9159393310547, 754.0317993164062, 332.4635314941406, 112.09403991699219, 615.9730834960938, 378.822509765625, -621.220458984375, 483.6430358886719, 89.90582275390625, 265.7799377441406, -743.828125, 169.60203552246094, 8.288202285766602, 328.5796813964844, 310.94061279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 447.0083312988281, "std": 628.2877197265625, "min": -1136.5517578125, "p10": -109.48952865600585, "median": 321.5956115722656, "p90": 1141.3868164062503, "max": 2920.105224609375, "pos_frac": 0.8125, "sample": [-242.17137145996094, 931.099365234375, 142.8866424560547, 428.0657958984375, -263.60833740234375, 113.58344268798828, 213.3028106689453, 1832.041015625, 427.9703674316406, 280.97698974609375, 437.2284851074219, -45.678466796875, 2920.105224609375, 192.58694458007812, 78.02682495117188, 551.199462890625, 187.628173828125, 28.035808563232422, 890.4852294921875, 519.4659423828125, 283.49346923828125, 897.543701171875, 1170.2608642578125, 590.2960205078125, 531.5851440429688, 97.34577178955078, 1074.0140380859375, 1262.3682861328125, 546.3175048828125, 182.75979614257812, 92.47982025146484, 115.99836730957031, 64.15425872802734, 44.97090148925781, 761.4324951171875, 922.9357299804688, 74.52764892578125, -124.69022369384766, 697.8132934570312, 652.316650390625, 1461.8260498046875, 5.290327072143555, -1136.5517578125, 1008.448974609375, -47.53986358642578, 752.88330078125, -443.1730651855469, -83.95280456542969, 614.3729248046875, 2079.8818359375, 51.98155212402344, -100.2550048828125, 1386.454833984375, 125.55496215820312, 358.603515625, 400.1376647949219, -80.81302642822266, 379.4938049316406, -113.44718170166016, -291.74871826171875, 470.95782470703125, 970.8109741210938, 993.574462890625, 284.58770751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 339.32427978515625, "std": 529.4675903320312, "min": -796.5955810546875, "p10": -390.9008056640625, "median": 357.7488098144531, "p90": 883.1506591796875, "max": 2303.020751953125, "pos_frac": 0.78125, "sample": [281.3931579589844, 202.36000061035156, 209.94485473632812, 76.41481018066406, 445.3250732421875, 536.042724609375, -546.8817138671875, 325.54888916015625, 887.991455078125, -385.264404296875, 754.6312866210938, 759.891357421875, 349.28289794921875, 464.75677490234375, 623.9081420898438, -407.5193176269531, 1151.84228515625, 670.390380859375, 451.59368896484375, 1226.6341552734375, -270.3294677734375, 639.2349243164062, 38.9034423828125, 578.5401000976562, 703.2354125976562, 620.587646484375, 272.1382141113281, 104.57581329345703, 275.0560302734375, 1021.6180419921875, -325.5962219238281, 2303.020751953125, -35.977298736572266, 1095.05908203125, 290.4169616699219, 420.5523681640625, 871.85546875, 118.80244445800781, 13.235450744628906, 379.63494873046875, 401.1278381347656, 40.1671142578125, -13.75811767578125, -216.24497985839844, 805.2003173828125, 203.48448181152344, 633.275146484375, 657.8438110351562, 1265.9986572265625, 511.903564453125, -498.1443786621094, -796.5955810546875, 60.0455436706543, -545.0098876953125, 567.4012451171875, 531.8047485351562, -393.31640625, -468.6759033203125, 91.89898681640625, -339.83892822265625, 308.434814453125, 862.5955200195312, 366.2147216796875, 488.0918884277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 396.96234130859375, "std": 519.2880859375, "min": -1088.0711669921875, "p10": -162.27193222045892, "median": 398.9407043457031, "p90": 1011.9482177734375, "max": 1720.5523681640625, "pos_frac": 0.765625, "sample": [-1088.0711669921875, 200.29168701171875, 1107.581787109375, 360.76507568359375, 465.8479919433594, -185.6678466796875, 405.2853088378906, 524.0330810546875, 614.66748046875, -64.58395385742188, 286.8670654296875, 96.19676971435547, -69.26512145996094, 779.7266235351562, 654.60888671875, 1562.0443115234375, 243.69590759277344, 28.841567993164062, -209.76707458496094, 1044.677734375, 494.22271728515625, 60.80079650878906, 532.5231323242188, -262.1440734863281, -107.68146514892578, 713.6242065429688, 198.96817016601562, 988.6385498046875, 1308.0, 1720.5523681640625, 876.230712890625, 524.6286010742188, 1012.1241455078125, -28.223793029785156, -380.89599609375, 256.6311340332031, 79.5517349243164, -50.64158630371094, 593.4241943359375, 620.2129516601562, 413.5160217285156, -46.07383728027344, 925.9741821289062, 996.1881103515625, 39.30836486816406, 1011.5377197265625, -5.570411682128906, 624.6032104492188, -199.7396697998047, 287.5316162109375, 92.80592346191406, 908.4761962890625, 1081.6463623046875, 495.72320556640625, -60.52159118652344, -860.8287353515625, 392.5960998535156, 199.0662841796875, 673.3739624023438, 657.6470947265625, 1004.7966918945312, 145.30543518066406, 645.64990234375, 74.2540054321289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 246.6733856201172, "std": 608.0569458007812, "min": -1177.602783203125, "p10": -532.1480163574219, "median": 239.26437377929688, "p90": 915.8178588867189, "max": 2061.4462890625, "pos_frac": 0.6875, "sample": [255.42849731445312, 280.62451171875, 261.1107482910156, -535.6726684570312, 929.5008544921875, 121.03688049316406, 854.1851806640625, 361.6180419921875, 2061.4462890625, 95.93383026123047, -190.16827392578125, 67.22804260253906, -903.6314086914062, 260.36737060546875, 178.17892456054688, -256.2864990234375, 974.0584106445312, 883.890869140625, 187.03890991210938, 786.109130859375, 429.221435546875, 69.95982360839844, -334.0511779785156, -331.03662109375, -44.80042266845703, -765.62548828125, 353.5123291015625, 111.94573974609375, -523.923828125, -618.9786376953125, -100.07676696777344, -94.18734741210938, 436.6004943847656, 689.3416748046875, 374.6784362792969, 1579.22265625, 344.00390625, 15.047622680664062, 813.9064331054688, 242.00674438476562, 11.003997802734375, 397.829345703125, 412.1346740722656, -126.44343566894531, -668.8816528320312, -1177.602783203125, 1571.965576171875, 464.3172607421875, -420.6031188964844, 706.811279296875, 872.195556640625, 1625.919189453125, 18.40149688720703, 524.5230712890625, 475.060302734375, -572.0784912109375, 138.2660675048828, 574.0567016601562, -24.6197509765625, 236.52200317382812, 1209.6005859375, -7.3626708984375, -16.85515594482422, 244.17160034179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 476.524658203125, "std": 524.1406860351562, "min": -480.09490966796875, "p10": -68.91252136230462, "median": 356.70542907714844, "p90": 1236.5334228515626, "max": 2212.4775390625, "pos_frac": 0.875, "sample": [605.3322143554688, 258.793701171875, -121.51799011230469, 301.1296691894531, 125.0831298828125, -480.09490966796875, 614.6862182617188, 438.83319091796875, 1068.147216796875, 138.56556701660156, 84.798583984375, 26.08769989013672, 179.593017578125, 136.59890747070312, 391.11395263671875, 934.4605712890625, 55.094417572021484, 1315.5357666015625, 946.0980224609375, -98.2865982055664, 271.8052062988281, 496.052978515625, 64.71653747558594, 250.24339294433594, 382.43438720703125, 249.36880493164062, 991.1647338867188, 111.49478912353516, 297.3971252441406, 174.37680053710938, 1207.60595703125, 732.5450439453125, 406.2846984863281, -323.8818359375, 686.422607421875, 376.98687744140625, 188.87400817871094, 593.5007934570312, 153.77865600585938, -131.56246948242188, 549.8601684570312, -398.456787109375, 336.4239807128906, 1248.930908203125, 774.2314453125, 322.40667724609375, 1103.562744140625, 1022.5242919921875, -290.1064453125, 2212.4775390625, 1690.3033447265625, 49.88777160644531, 1350.8214111328125, 1305.2864990234375, 500.24591064453125, 1398.2962646484375, 157.23023986816406, 543.0460815429688, -0.37300872802734375, 256.9255065917969, 566.9614868164062, 431.5078125, 1128.937744140625, 136.9856719970703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 422.4578857421875, "std": 665.2835693359375, "min": -1306.100830078125, "p10": -369.90897827148433, "median": 348.81988525390625, "p90": 1151.644226074219, "max": 2535.233154296875, "pos_frac": 0.75, "sample": [241.93145751953125, -57.32035827636719, 700.4373168945312, 250.9495086669922, 1.0609016418457031, 2535.233154296875, 968.3495483398438, -530.9771728515625, 267.9633483886719, 241.99853515625, 1074.195556640625, -27.820358276367188, 487.1827697753906, 520.750244140625, 24.303123474121094, 1422.1668701171875, 1415.674072265625, 607.9898681640625, 1445.709716796875, 599.2872924804688, 618.210693359375, 1166.6451416015625, 188.8734130859375, 491.5363464355469, 932.3471069335938, -37.64649963378906, 909.0517578125, 1116.067138671875, 695.6961669921875, 224.56976318359375, 117.03199768066406, 666.8225708007812, 921.331298828125, -88.4906005859375, 1116.64208984375, 138.8831787109375, 990.5078125, -380.2334899902344, -37.63316345214844, 719.4849243164062, 332.16131591796875, -1306.100830078125, 852.69873046875, -719.560302734375, 749.278564453125, 365.47845458984375, 418.0244140625, -345.8184509277344, 847.2443237304688, -208.05020141601562, 39.215240478515625, -838.4473876953125, -94.60298156738281, -422.1474914550781, 100.81031799316406, 1455.083251953125, 536.4220581054688, -560.6047973632812, 669.3262329101562, -55.914581298828125, 175.53392028808594, 206.89341735839844, 145.88710021972656, 2035.731689453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 281.2471618652344, "std": 541.407470703125, "min": -925.3262939453125, "p10": -346.0019805908203, "median": 312.13218688964844, "p90": 905.9009033203126, "max": 1687.659423828125, "pos_frac": 0.6875, "sample": [966.5679931640625, -261.17144775390625, -214.6832275390625, 509.82958984375, 396.6108093261719, 427.94830322265625, -238.51385498046875, 246.0248260498047, 317.9830017089844, 222.58975219726562, -337.8647155761719, 152.08126831054688, 712.8213500976562, 1423.7508544921875, -77.80193328857422, -213.5213165283203, 497.3319396972656, -780.7977294921875, 257.71533203125, 148.6836395263672, -30.19295310974121, 30.544641494750977, 914.3704833984375, 637.267333984375, 1058.608154296875, -62.28514099121094, 581.0684814453125, -335.59320068359375, -17.19818878173828, 453.4736022949219, 630.5363159179688, -155.12966918945312, 162.6204071044922, -925.3262939453125, 590.1810913085938, 674.656982421875, 1389.692626953125, -414.6177978515625, 403.55023193359375, -131.08145141601562, 859.2990112304688, -366.5928955078125, 641.0574340820312, 347.5244140625, -785.6530151367188, -704.0718383789062, 380.1325378417969, 419.1329345703125, 62.85288619995117, 556.68798828125, -349.4893798828125, 306.2813720703125, 799.746337890625, 1687.659423828125, 886.1385498046875, 331.619140625, 401.21484375, 1420.4776611328125, 60.380096435546875, -77.34073638916016, 197.57171630859375, 824.2395629882812, 390.859375, 99.36044311523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 412.1462097167969, "std": 526.2783813476562, "min": -1064.5968017578125, "p10": -182.17593994140617, "median": 364.770751953125, "p90": 1161.0343505859378, "max": 1868.676025390625, "pos_frac": 0.8125, "sample": [461.2557678222656, 1031.2301025390625, 405.913330078125, 124.79780578613281, 322.8933410644531, 433.0090026855469, 682.6931762695312, -47.43452072143555, -6.3838043212890625, 1339.94677734375, -342.5955810546875, 37.630279541015625, 1868.676025390625, 395.2422180175781, 190.11886596679688, 473.3816223144531, -24.275634765625, 458.067138671875, 330.3824157714844, 356.43115234375, -229.99253845214844, 588.969482421875, 1211.041748046875, 20.299896240234375, 327.9075927734375, 1088.93896484375, -252.02955627441406, 971.4041137695312, 473.9965515136719, 34.12168884277344, 90.40660858154297, -28.059242248535156, -253.5014190673828, 65.60015869140625, 562.9976196289062, 100.11003112792969, 311.71368408203125, -1064.5968017578125, 978.0465698242188, 44.43953323364258, -362.48822021484375, 877.369873046875, 166.53985595703125, 849.989013671875, 98.24006652832031, 1191.932373046875, 1007.963134765625, -221.82139587402344, 251.52920532226562, 324.2750244140625, 520.0604248046875, 1489.28955078125, 414.6988525390625, 626.3004150390625, 503.0939636230469, 712.190673828125, 884.3087158203125, -89.66986846923828, 373.1103515625, 16.08468246459961, 381.8605651855469, 35.0103759765625, 1482.94091796875, 1311.75634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 337.40020751953125, "std": 481.3662414550781, "min": -860.1619873046875, "p10": -283.13714294433595, "median": 340.2132873535156, "p90": 955.4787231445313, "max": 1553.3665771484375, "pos_frac": 0.765625, "sample": [307.3067932128906, 541.0228881835938, -268.7994689941406, 150.40234375, 320.5274658203125, 636.8043212890625, 601.3382568359375, -46.5430908203125, 728.6583862304688, 782.335205078125, 20.93798828125, -297.83599853515625, -46.14642333984375, 487.4773254394531, -349.2861022949219, -442.3747863769531, 446.9598693847656, 623.9482421875, 172.99609375, 227.29531860351562, 483.341552734375, -268.4617004394531, 65.93891906738281, 943.241455078125, 84.25782775878906, 453.9744873046875, 1397.0849609375, -135.94952392578125, -350.7086181640625, 38.94432830810547, 299.3267517089844, 117.21354675292969, 358.069580078125, 1097.9193115234375, 1003.9093627929688, 479.6279296875, 244.44374084472656, -117.2375717163086, 1151.510498046875, 854.4977416992188, 960.7232666015625, 750.2554931640625, 40.303680419921875, -376.88726806640625, 409.86895751953125, 396.9890441894531, 114.11497497558594, 890.8994140625, -860.1619873046875, 368.3289794921875, -25.25229263305664, 1553.3665771484375, 1267.452880859375, 529.893798828125, 332.9599609375, 546.3310546875, 715.7803955078125, 423.3191223144531, -230.9508819580078, 60.0223388671875, 41.494041442871094, 347.46661376953125, 828.605224609375, -289.2818603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 438.8658142089844, "std": 526.4180297851562, "min": -675.3382568359375, "p10": -289.9163986206054, "median": 395.0667266845703, "p90": 1254.6381958007814, "max": 1614.326416015625, "pos_frac": 0.8125, "sample": [1276.1748046875, 307.5791015625, 346.5958251953125, -115.99427795410156, 697.0755615234375, 66.94673156738281, -175.6538848876953, 667.055419921875, 416.37469482421875, -405.95269775390625, -47.60169982910156, 371.7355651855469, 97.29195404052734, 207.81434631347656, 531.0819091796875, 1432.8370361328125, 1614.326416015625, 181.99371337890625, 414.26983642578125, 200.98703002929688, 411.54156494140625, 105.4638671875, 175.4181671142578, 1277.9095458984375, -12.09808349609375, 298.94732666015625, 342.6490783691406, 753.1657104492188, 1204.3861083984375, -172.5811767578125, 808.4307250976562, 788.7103271484375, 599.2745361328125, 542.2432250976562, 1104.0870361328125, -351.9783630371094, -338.88604736328125, 196.43960571289062, 745.3675537109375, 140.22561645507812, 582.427001953125, -518.3394775390625, 410.8705139160156, 686.333984375, 660.69970703125, 642.767822265625, 1043.075439453125, 168.6902313232422, 558.42919921875, 1445.2498779296875, 822.8316650390625, 353.1697692871094, 356.86297607421875, -675.3382568359375, 379.262939453125, -500.9251403808594, 474.8843994140625, 1385.8673095703125, 36.570823669433594, 357.3904113769531, 1316.4935302734375, 710.7188110351562, -456.8693542480469, 1142.6336669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 427.59405517578125, "std": 534.3349609375, "min": -437.31060791015625, "p10": -209.54832458496094, "median": 371.96888732910156, "p90": 1149.7056274414065, "max": 1947.66015625, "pos_frac": 0.75, "sample": [494.445556640625, 1310.9573974609375, 721.9368286132812, 193.65789794921875, 717.8630981445312, -190.486572265625, 258.74212646484375, 1572.2030029296875, 931.4461059570312, -210.34982299804688, 511.7416076660156, -206.7423095703125, 337.44317626953125, 1515.022216796875, -158.07696533203125, -23.925548553466797, -110.79792022705078, 124.0794448852539, 612.7986450195312, 306.85699462890625, 104.92245483398438, -153.62100219726562, -134.13473510742188, 214.03855895996094, -243.07049560546875, 170.6009521484375, 805.1826171875, -244.4442138671875, 1558.11474609375, -117.9808578491211, 834.5740966796875, 483.0533447265625, 112.13814544677734, 1242.25244140625, 263.2082214355469, 434.3775634765625, 404.775634765625, 605.393798828125, 1084.7919921875, 369.9933776855469, 573.42822265625, -232.16912841796875, -281.3463134765625, -422.9671630859375, 373.94439697265625, 75.29194641113281, 322.734375, 898.7294921875, 252.98484802246094, -207.67816162109375, 469.3324279785156, 1177.5257568359375, 379.755615234375, -437.31060791015625, 698.6600341796875, 667.408447265625, 648.5595703125, 1947.66015625, 274.4337463378906, 1017.31103515625, 938.324951171875, 906.2725830078125, 61.29963302612305, 760.8524169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 388.35614013671875, "std": 534.9261474609375, "min": -796.4052124023438, "p10": -309.20848999023434, "median": 366.1194152832031, "p90": 1139.4502685546877, "max": 1578.1944580078125, "pos_frac": 0.8125, "sample": [-26.65447235107422, 1315.9725341796875, 1356.1641845703125, 719.0341186523438, 950.7701416015625, -128.63084411621094, -6.011003494262695, -796.4052124023438, 408.42852783203125, 1266.6429443359375, 780.07470703125, 134.44712829589844, 541.6388549804688, 351.011474609375, 97.1939697265625, 1578.1944580078125, 720.13818359375, 561.359619140625, 142.20594787597656, 320.7274169921875, -20.784439086914062, 969.8722534179688, 104.95466613769531, 16.652761459350586, 453.28411865234375, 193.84750366210938, -577.9928588867188, 629.8571166992188, 361.75177001953125, 386.1015625, -321.6143798828125, 76.76593017578125, 414.94854736328125, -525.9246826171875, 959.8038940429688, 128.19798278808594, 214.01052856445312, 153.758544921875, 723.3911743164062, -280.26141357421875, -415.06097412109375, 810.0865478515625, 194.5270538330078, 453.5116882324219, -635.76025390625, 1025.26171875, 792.4903564453125, 29.071468353271484, 532.7064208984375, 1106.9378662109375, 40.453819274902344, 1153.3841552734375, 1041.3082275390625, 139.04559326171875, 1181.079833984375, 370.487060546875, 498.7532043457031, 1298.064697265625, -673.5242309570312, 602.2279052734375, 74.61039733886719, 426.77130126953125, 215.3261260986328, 246.111083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 348.22406005859375, "std": 663.2416381835938, "min": -1296.5108642578125, "p10": -382.2692413330078, "median": 361.42852783203125, "p90": 1190.2753295898438, "max": 1850.37939453125, "pos_frac": 0.765625, "sample": [83.33644104003906, 1074.45703125, 23.091337203979492, 437.3179931640625, -805.5546875, 191.2537078857422, 333.4690246582031, 1047.2255859375, 1850.37939453125, 1178.6768798828125, -967.4805297851562, 1771.6888427734375, 252.21444702148438, 784.4507446289062, -806.410400390625, -277.0061340332031, -73.66680145263672, 385.2354431152344, 44.3760986328125, 1374.32080078125, 171.64404296875, 953.158935546875, 36.050331115722656, 1195.24609375, 78.36451721191406, 115.77809143066406, 151.53652954101562, 975.9371948242188, 1623.185791015625, 473.6836853027344, 34.16500473022461, -250.35914611816406, 138.69662475585938, 801.0006103515625, 761.8590087890625, 366.5123291015625, -146.65789794921875, 382.8121643066406, -392.1727294921875, 578.24658203125, 490.9644470214844, -94.0376968383789, 756.0263061523438, 1291.195068359375, 356.3447265625, -233.58636474609375, 493.9015197753906, -1296.5108642578125, 621.96484375, 525.8143920898438, 642.1980590820312, 52.78480529785156, 735.2648315429688, 285.7625427246094, -730.7676391601562, 754.1895141601562, 702.1126098632812, 122.4906005859375, 1297.785400390625, -359.1611022949219, -1232.5562744140625, 717.8163452148438, 502.03839111328125, -65.75652313232422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 266.99151611328125, "std": 558.8416137695312, "min": -888.1651611328125, "p10": -377.1859436035156, "median": 151.0959129333496, "p90": 1105.7166992187504, "max": 1911.029052734375, "pos_frac": 0.640625, "sample": [-33.04109191894531, 451.1220397949219, -261.7251281738281, -1.76385498046875, -383.9901428222656, -605.5736694335938, 1218.46630859375, -396.74432373046875, 1030.81298828125, 609.130859375, -26.8121337890625, 1146.52197265625, 1147.4779052734375, 1041.151611328125, -40.23847198486328, -725.6179809570312, 1133.387451171875, 1172.0086669921875, 593.5538940429688, 38.99460983276367, -177.04014587402344, 32.610107421875, 450.0727844238281, 89.4284439086914, 59.43467330932617, 1911.029052734375, 450.51849365234375, 103.63727569580078, 198.55455017089844, -674.4445190429688, -9.296449661254883, 402.2646789550781, -350.5889892578125, 283.6083984375, 751.8969116210938, 892.10888671875, -233.06027221679688, 1157.1060791015625, -155.98431396484375, 478.4898681640625, -888.1651611328125, 657.6343383789062, -361.3094787597656, -635.106689453125, 1.5566730499267578, 3.700927734375, 6.832757949829102, -97.19385528564453, 538.16845703125, 787.3612060546875, 458.06170654296875, 324.2496032714844, 671.5279541015625, 349.66827392578125, 280.89837646484375, 808.4808959960938, -37.028717041015625, 54.45219421386719, -138.0633544921875, 608.478759765625, -35.04937744140625, 341.73272705078125, -166.2755126953125, 785.3765258789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 343.26385498046875, "std": 543.6307373046875, "min": -774.8511962890625, "p10": -374.77753906249995, "median": 284.3555908203125, "p90": 1122.1285156250008, "max": 1507.3824462890625, "pos_frac": 0.78125, "sample": [794.8287353515625, 46.585540771484375, 92.60328674316406, 38.75469970703125, -69.06703186035156, -624.9873046875, 550.2035522460938, 351.33990478515625, 194.03762817382812, 862.2399291992188, 448.87908935546875, 292.9013366699219, 1507.3824462890625, 690.7645874023438, 146.75694274902344, 792.6495361328125, 660.127685546875, 275.8098449707031, 749.9805297851562, -669.9703369140625, -436.0264892578125, 1298.208984375, -593.908935546875, 1497.8385009765625, -406.114501953125, 411.0076904296875, -201.73641967773438, 411.9576721191406, 380.6277770996094, -31.900543212890625, 86.69227600097656, 457.0731201171875, 1253.2738037109375, 649.2139892578125, 254.6979522705078, 586.3812255859375, 440.6145324707031, 45.7867546081543, 254.25680541992188, -671.5201416015625, 185.62918090820312, 893.606689453125, 157.89553833007812, 18.052955627441406, 1209.632568359375, 863.396240234375, 791.994873046875, 482.25958251953125, -151.61346435546875, 496.40106201171875, -774.8511962890625, 67.18141174316406, 175.6356201171875, -301.657958984375, 342.2010498046875, -50.019107818603516, 891.3134765625, 917.952392578125, 1475.608642578125, 169.19944763183594, 1253.401611328125, -170.0178985595703, 125.52822875976562, 81.90901947021484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 375.58038330078125, "std": 557.23779296875, "min": -660.7193603515625, "p10": -142.28618927001952, "median": 243.7145004272461, "p90": 1015.0718322753908, "max": 2765.68359375, "pos_frac": 0.78125, "sample": [848.380615234375, 258.407470703125, 632.4589233398438, 42.162315368652344, 63.51696014404297, 976.9102783203125, 219.32366943359375, 991.5665893554688, -272.84234619140625, 386.31341552734375, 1312.677978515625, 129.1488800048828, 1056.9432373046875, 942.12841796875, -143.35914611816406, 395.658203125, 274.5633850097656, 1652.280517578125, -23.117416381835938, -221.49813842773438, -68.92047119140625, -139.78262329101562, 602.2489013671875, 377.22064208984375, -281.19952392578125, -260.3787536621094, 184.41329956054688, 114.96857452392578, 201.4785919189453, 325.14691162109375, 130.41644287109375, 178.8863525390625, 270.1600341796875, 151.46759033203125, 860.141845703125, 110.17991638183594, 11.612396240234375, 279.8913269042969, 1465.8267822265625, 443.2010498046875, -118.28285217285156, -660.7193603515625, 502.5811462402344, -296.8223571777344, 395.6745300292969, 292.2041015625, 229.0215301513672, -85.22196960449219, 559.9114379882812, 722.8555297851562, 22.07653045654297, 1025.1455078125, 110.35279846191406, -76.66966247558594, 455.0455322265625, 166.97618103027344, 801.8189086914062, 124.01510620117188, -134.25384521484375, 55.63148880004883, 1498.1279296875, 569.5194091796875, 633.8703002929688, 2765.68359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 295.7395935058594, "std": 582.82373046875, "min": -1053.8514404296875, "p10": -337.16446228027337, "median": 185.9360809326172, "p90": 1124.1784667968752, "max": 1815.9837646484375, "pos_frac": 0.71875, "sample": [-269.9217529296875, 4.62518310546875, 612.8433837890625, 88.24174499511719, 551.2466430664062, 681.1871337890625, 161.12265014648438, 576.9319458007812, 285.4826354980469, 640.5281982421875, 1815.9837646484375, -25.821378707885742, -530.9013671875, 426.9516906738281, 1079.52099609375, 1399.677001953125, 138.9744415283203, 212.2464141845703, 546.7196044921875, 563.3558959960938, -127.38194274902344, 522.2482299804688, 27.529094696044922, 169.20143127441406, -517.9617919921875, 519.884765625, 433.3788146972656, 308.0837097167969, 182.0592498779297, 186.2482147216797, -289.6014404296875, 102.7412338256836, -168.38290405273438, 1599.4503173828125, 85.96804809570312, 936.1560668945312, 1430.202392578125, 464.1478271484375, -185.2340087890625, -53.501426696777344, 207.91497802734375, 544.5281372070312, 132.60072326660156, 236.13992309570312, -37.08582305908203, 335.455322265625, 1143.3173828125, 185.6239471435547, -357.5486145019531, -5.4956207275390625, 1772.6016845703125, 51.129066467285156, -1053.8514404296875, -9.720306396484375, 319.8033752441406, -698.8457641601562, 61.65797424316406, -492.0325927734375, 593.5086059570312, 89.39926147460938, 1636.0087890625, -269.6286315917969, 440.15130615234375, -482.52740478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 473.5969543457031, "std": 633.044677734375, "min": -1021.6427612304688, "p10": -118.72459793090819, "median": 372.9154357910156, "p90": 1358.3270996093752, "max": 2323.79052734375, "pos_frac": 0.8125, "sample": [751.211669921875, 578.2907104492188, 1165.314453125, 361.7586669921875, 383.23162841796875, 1610.5789794921875, -124.84495544433594, 1022.4717407226562, 1368.1187744140625, -187.71278381347656, -32.171470642089844, -62.8289794921875, 51.585960388183594, 70.95364379882812, 692.81982421875, 96.1235122680664, 786.1420288085938, 131.9011993408203, 428.2436828613281, 1479.705078125, 346.6133117675781, 392.9293212890625, -20.542388916015625, 965.2823486328125, 524.8652954101562, 253.31448364257812, 42.93101119995117, 116.49366760253906, 96.94731140136719, -972.7532958984375, -1021.6427612304688, 19.313661575317383, 777.6329345703125, 362.5992431640625, 554.6199951171875, 2323.79052734375, 1408.8248291015625, -61.77162170410156, -361.830322265625, 272.79400634765625, 624.2981567382812, 1107.0872802734375, 114.23636627197266, 239.0226593017578, 401.9089050292969, -104.44376373291016, 554.9246215820312, 894.5767822265625, 46.64958190917969, 1335.4798583984375, 232.7745361328125, -130.88665771484375, -142.77655029296875, 2176.74609375, 1589.44921875, 49.21104431152344, 643.6209716796875, 432.0743408203125, 699.0557861328125, 730.7408447265625, 34.71598815917969, 1281.21337890625, 3.8834609985351562, 905.3388061523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 356.98040771484375, "std": 536.8341674804688, "min": -1066.1646728515625, "p10": -205.19825897216796, "median": 386.2997131347656, "p90": 978.154754638672, "max": 1922.015380859375, "pos_frac": 0.828125, "sample": [318.46746826171875, 761.1962890625, 361.8140563964844, 893.601318359375, -1066.1646728515625, 590.6678466796875, 1098.4674072265625, 608.0160522460938, 134.5772705078125, 1062.9195556640625, 96.16946411132812, -729.4639892578125, -960.0040283203125, 255.3791046142578, 961.0822143554688, -206.1713409423828, 703.0064697265625, 650.7477416992188, 1204.623046875, 1407.9765625, 218.64903259277344, 985.4715576171875, 779.466796875, -202.927734375, 362.4808349609375, 528.1265869140625, 483.9541015625, 410.11859130859375, -965.5578002929688, 53.46772003173828, 495.7613830566406, 468.4655456542969, 498.52996826171875, -112.48123931884766, -512.2738647460938, 533.2973022460938, 468.6604309082031, 1000.7001953125, 268.98248291015625, 190.687744140625, 209.59298706054688, 162.13668823242188, -157.27069091796875, 444.9507141113281, 5.28611946105957, 685.458984375, 500.0162353515625, 294.8865661621094, -217.53366088867188, 149.66552734375, 175.51632690429688, 1922.015380859375, 819.088134765625, 603.0599975585938, 528.6332397460938, 231.11875915527344, 207.3695068359375, 960.2115478515625, 653.1084594726562, 54.803558349609375, 9.049057006835938, -39.43921661376953, 506.1584777832031, 38.37642288208008], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 322.57403564453125, "std": 435.3083801269531, "min": -1590.05908203125, "p10": -106.93160400390623, "median": 328.57794189453125, "p90": 865.0339538574219, "max": 1142.89306640625, "pos_frac": 0.84375, "sample": [365.1435241699219, 967.861572265625, 513.514892578125, 365.696533203125, 669.9036254882812, 228.37457275390625, 1131.223388671875, 238.4359130859375, 232.79815673828125, 534.24755859375, 443.33221435546875, -437.9151611328125, 2.190418243408203, 84.86566162109375, 22.05165672302246, 238.5464324951172, 798.3328857421875, 867.6906127929688, 436.8533630371094, 1014.1929931640625, 262.332275390625, 651.4488525390625, 828.00732421875, 337.4908142089844, 293.3870544433594, -22.985788345336914, 129.17266845703125, 337.8955078125, 283.9064025878906, -1590.05908203125, 671.7701416015625, 424.95953369140625, 517.6779174804688, 161.95762634277344, 851.0032348632812, 707.4841918945312, 464.05804443359375, -116.1553955078125, 869.36279296875, 101.87235260009766, 258.27398681640625, 42.999053955078125, 930.7296142578125, -155.00401306152344, 259.3765869140625, 412.2632751464844, 618.020263671875, 19.379066467285156, 325.8302001953125, 188.41549682617188, 858.8350830078125, 331.32568359375, 93.46131134033203, 21.08759307861328, -259.3952331542969, -22.170780181884766, 261.5299377441406, 433.95074462890625, -418.52740478515625, 1142.89306640625, -85.409423828125, -306.13787841796875, 369.6209716796875, 441.4639587402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 295.3502197265625, "std": 547.2053833007812, "min": -804.99267578125, "p10": -397.85045471191404, "median": 247.70547485351562, "p90": 934.9824157714845, "max": 2180.75, "pos_frac": 0.734375, "sample": [184.38426208496094, 528.0764770507812, 465.58038330078125, -804.99267578125, 713.9784545898438, 405.5828857421875, -80.1322021484375, 464.751220703125, -427.1195373535156, 485.9629821777344, 208.11785888671875, 179.46267700195312, 840.5785522460938, -240.31283569335938, 268.7130126953125, 771.2618408203125, -611.5394897460938, 1339.2957763671875, 87.48082733154297, 443.4833679199219, 657.849609375, 914.4461059570312, -107.11836242675781, 313.1741638183594, -209.81004333496094, -365.8084716796875, 621.365478515625, 943.78369140625, 1285.4566650390625, 647.4285278320312, 226.69793701171875, 53.879669189453125, 904.0045166015625, 156.24627685546875, 654.011474609375, 473.1830139160156, 30.392745971679688, 283.5169677734375, -477.14874267578125, 1266.44287109375, 2180.75, 214.37449645996094, -152.6127166748047, -110.48151397705078, 1058.82177734375, 191.54788208007812, 850.8684692382812, 94.96272277832031, -719.9142456054688, 405.0605773925781, -82.37089538574219, 991.9588623046875, 300.292236328125, 50.02989959716797, 311.73126220703125, 711.1411743164062, -411.5827331542969, 87.83712768554688, 191.27696228027344, -326.1035461425781, -671.6768188476562, 320.3236083984375, -173.57373046875, 95.14412689208984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 380.12744140625, "std": 486.2052307128906, "min": -1025.8934326171875, "p10": -230.98012695312497, "median": 308.9476776123047, "p90": 1039.6827758789063, "max": 1553.9444580078125, "pos_frac": 0.796875, "sample": [565.597900390625, 1122.7650146484375, 1553.9444580078125, 760.023193359375, -8.79753589630127, 935.1863403320312, 394.2496337890625, 297.275634765625, -260.365966796875, 287.93157958984375, -179.4693603515625, -31.470687866210938, 1207.45947265625, 492.94879150390625, 292.9951171875, 641.8876342773438, 221.42144775390625, 554.831298828125, -201.48190307617188, 1160.511962890625, 871.2670288085938, 612.9017944335938, 562.3412475585938, -107.64244079589844, 524.160888671875, 293.377197265625, -38.83348083496094, 570.212646484375, 761.0615234375, 306.7643127441406, 275.6313171386719, 275.2933349609375, -708.687255859375, 311.13104248046875, -306.6348571777344, 330.532470703125, 231.13897705078125, 20.791336059570312, 170.35507202148438, 282.2280578613281, 632.5308837890625, 794.9761962890625, 162.133544921875, -1025.8934326171875, 1041.945556640625, -538.08935546875, 60.790374755859375, 1139.9178466796875, 676.7817993164062, -272.9187927246094, 692.243408203125, 786.5036010742188, 145.58018493652344, 304.6113586425781, 87.92399597167969, 789.6323852539062, 420.8774719238281, 1295.7996826171875, 1034.4029541015625, 369.25445556640625, -243.62222290039062, 206.15701293945312, 481.87579345703125, 239.90777587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 452.1407470703125, "std": 523.0543823242188, "min": -538.715087890625, "p10": -68.6841033935547, "median": 385.17236328125, "p90": 957.5186218261719, "max": 2255.72998046875, "pos_frac": 0.84375, "sample": [425.54058837890625, 161.19659423828125, -158.32044982910156, 153.84262084960938, 35.502174377441406, 166.19854736328125, 956.4854125976562, 768.7427978515625, 50.70307922363281, 136.81944274902344, 413.52020263671875, 187.5811309814453, 492.43157958984375, 776.69482421875, 1281.632080078125, 69.05550384521484, 555.3641967773438, 356.82452392578125, 503.6882629394531, 910.9365844726562, -69.06954956054688, 204.29635620117188, -67.78472900390625, -424.18914794921875, 998.337158203125, 521.376708984375, 253.988525390625, 415.77288818359375, 54.78765869140625, -115.57786560058594, 56.267478942871094, 972.5145263671875, 701.0599365234375, 204.7231903076172, 676.5818481445312, 1048.61474609375, 114.8847427368164, 562.7470092773438, -538.715087890625, -450.6057434082031, 939.3306884765625, 883.5184326171875, 699.5125732421875, 957.96142578125, 919.5801391601562, 814.884521484375, 2255.72998046875, 124.55174255371094, 854.5653076171875, -469.02423095703125, 818.495849609375, 940.7818603515625, 257.29541015625, 127.48908233642578, 944.939453125, -9.058073043823242, -44.57518768310547, 784.9279174804688, 338.4845886230469, 58.86297607421875, 902.4400024414062, 2026.9044189453125, 333.3445129394531, 111.61393737792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 434.49066162109375, "std": 614.60791015625, "min": -635.2991333007812, "p10": -296.2890106201172, "median": 362.3848571777344, "p90": 1232.4511718750002, "max": 2558.4140625, "pos_frac": 0.765625, "sample": [-352.02392578125, -297.7502746582031, 336.24462890625, 45.558380126953125, 1262.280517578125, -51.28839111328125, 900.1135864257812, 388.52508544921875, 680.6328125, 984.4153442382812, -90.08880615234375, 238.95114135742188, -635.2991333007812, 763.96435546875, 55.33835220336914, 438.7212219238281, 75.49557495117188, 1352.8948974609375, 1840.367431640625, 545.3379516601562, 1193.16845703125, 742.2462158203125, 613.237060546875, 56.276187896728516, 327.50860595703125, 232.92318725585938, 537.3340454101562, 122.28494262695312, 28.90701675415039, 253.941162109375, 1441.133544921875, -614.2106323242188, 1186.2021484375, -114.51321411132812, -25.55987548828125, 253.13995361328125, 1014.6638793945312, -340.95001220703125, 478.8020324707031, 427.8661804199219, 829.1117553710938, -259.010498046875, -175.12510681152344, 209.97793579101562, -162.39007568359375, 646.1600341796875, 555.4905395507812, 119.51748657226562, -292.87939453125, 468.77886962890625, 511.60498046875, 1249.28662109375, 816.7000732421875, -311.69482421875, 258.19561767578125, -482.9432373046875, 2558.4140625, 410.7181091308594, 1551.793212890625, 68.81561279296875, 980.2357177734375, 1095.521240234375, 845.0678100585938, 19.265201568603516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 447.4491882324219, "std": 627.5985717773438, "min": -1011.31982421875, "p10": -253.0359649658203, "median": 414.5137176513672, "p90": 1410.4606567382814, "max": 1887.2314453125, "pos_frac": 0.734375, "sample": [-376.00164794921875, 113.50640869140625, -174.4285888671875, -160.6603546142578, 367.77166748046875, 1174.5548095703125, 731.6263427734375, 1067.9134521484375, 536.9634399414062, 1887.2314453125, -1011.31982421875, 882.9933471679688, 201.6269073486328, -196.35269165039062, 804.3651733398438, 1056.030517578125, 608.5944213867188, 30.842288970947266, 367.440673828125, 1073.652099609375, 346.50830078125, -741.9906616210938, 100.40184020996094, 1481.5205078125, 483.4989318847656, -663.7356567382812, 1571.2130126953125, -216.14479064941406, -87.0741195678711, 350.612548828125, -244.02182006835938, 374.9403991699219, 464.03143310546875, 575.3030395507812, -39.83816146850586, 1387.993408203125, 629.9474487304688, 36.298484802246094, -154.75816345214844, 625.6101684570312, 577.5025024414062, 423.94793701171875, 461.9297790527344, 311.5635986328125, 388.2613830566406, 1595.165283203125, -315.58544921875, 1634.494140625, 405.0794982910156, -16.938217163085938, 626.1046142578125, 1179.5330810546875, 843.60498046875, -85.07600402832031, 1769.388427734375, 456.9326171875, -256.899169921875, 58.902984619140625, 295.5943298339844, 859.6041870117188, -267.03302001953125, 507.11334228515625, 1420.0894775390625, 496.8011169433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 374.19342041015625, "std": 557.3058471679688, "min": -972.3836669921875, "p10": -214.04468841552733, "median": 352.7792510986328, "p90": 1051.173706054688, "max": 2377.25390625, "pos_frac": 0.78125, "sample": [-89.19297790527344, -272.62872314453125, -77.35336303710938, 257.4378967285156, 213.67123413085938, 88.84883117675781, 727.7996215820312, 578.5333251953125, 909.6578369140625, 337.0224609375, 483.24688720703125, -88.45919799804688, 146.5977783203125, -148.620361328125, 486.7565612792969, 473.3829345703125, 234.0273895263672, 768.3777465820312, 1428.64990234375, -202.31153869628906, 2377.25390625, 48.644256591796875, 626.5128784179688, 121.86184692382812, 382.2476806640625, 695.1940307617188, 62.20684814453125, -172.59683227539062, 428.65167236328125, 361.737060546875, 139.60125732421875, -331.35552978515625, 1156.1046142578125, -461.0199279785156, -219.07318115234375, 557.036376953125, 1149.156982421875, 96.79265594482422, 194.50909423828125, 920.968505859375, 26.370071411132812, 1824.5091552734375, -720.5819091796875, -972.3836669921875, 552.9827880859375, 467.1179504394531, -279.14703369140625, 770.8541870117188, 66.93994903564453, 18.356523513793945, 635.7967529296875, 592.068115234375, 1097.7308349609375, 457.4314270019531, 355.61968994140625, 343.19854736328125, 370.356689453125, 1257.9005126953125, 349.9388122558594, 166.86244201660156, 855.1814575195312, 403.0510559082031, -24.19347381591797, 942.5404052734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 411.5608825683594, "std": 552.8740234375, "min": -538.665771484375, "p10": -258.6726470947265, "median": 313.98130798339844, "p90": 1053.6812622070318, "max": 2070.567138671875, "pos_frac": 0.796875, "sample": [-538.665771484375, -105.88711547851562, 155.29586791992188, -415.7782897949219, 498.4217224121094, 949.9495849609375, 96.14297485351562, 199.5867919921875, 1287.11572265625, 858.331787109375, 217.3057403564453, 2070.567138671875, 594.1898803710938, 14.947998046875, -241.79409790039062, 226.6470947265625, 470.34564208984375, 297.4323425292969, 484.0290832519531, 1240.4921875, 828.3102416992188, -299.57208251953125, 1420.7301025390625, 28.98151397705078, 773.5582275390625, 723.5770263671875, 513.3165893554688, 194.56666564941406, -107.93000030517578, -265.90631103515625, 423.09527587890625, 680.6669311523438, -284.6759948730469, 215.59970092773438, -488.6710510253906, 330.5302734375, 253.71304321289062, 898.5966186523438, -8.194585800170898, -84.780029296875, -404.70074462890625, -89.07862091064453, 1894.036865234375, 1098.1376953125, 229.38119506835938, 493.32269287109375, 238.02178955078125, 693.5030517578125, 781.1424560546875, 242.37188720703125, 48.43196105957031, 751.2214965820312, 6.7837371826171875, 289.71746826171875, 708.695068359375, 1967.2020263671875, 385.66015625, 585.3164672851562, 22.87236785888672, 187.0935821533203, 556.4034423828125, 493.9211120605469, 577.7998046875, 478.4508972167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 303.0077209472656, "std": 532.7410888671875, "min": -1233.431396484375, "p10": -232.42487182617185, "median": 307.08570861816406, "p90": 1023.2402832031253, "max": 1827.235595703125, "pos_frac": 0.796875, "sample": [410.013427734375, -82.33595275878906, 435.4644775390625, 130.28538513183594, 90.46318817138672, 197.89439392089844, 955.828369140625, 505.75726318359375, 19.796478271484375, 1300.415283203125, -259.9658203125, 755.1133422851562, 432.6767272949219, -178.00375366210938, 65.95285034179688, 1052.131103515625, -654.6769409179688, 236.71627807617188, 475.63397216796875, 113.47412872314453, -243.29049682617188, -359.9515686035156, 296.1273498535156, 428.25567626953125, -530.425048828125, 602.1239013671875, -25.23896026611328, 1411.3408203125, 617.96826171875, 201.9251251220703, 548.4256591796875, 45.62470245361328, 635.8927001953125, 249.58364868164062, 409.7716064453125, 269.1114501953125, 1079.869140625, 19.674560546875, 1351.326416015625, 155.49044799804688, -1015.9744873046875, 517.8923950195312, -202.51739501953125, 348.34600830078125, 446.07208251953125, 98.49789428710938, 45.400978088378906, 103.19219207763672, -1233.431396484375, 370.8614501953125, 439.4217224121094, 223.19332885742188, -207.07174682617188, 462.7351379394531, 332.9305114746094, 101.54295349121094, 318.0440673828125, 1827.235595703125, 1401.5880126953125, 658.0103759765625, 322.8827819824219, -198.08615112304688, 548.7041625976562, 516.783935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 383.71197509765625, "std": 462.9171447753906, "min": -626.5985107421875, "p10": -138.9622543334961, "median": 354.0106201171875, "p90": 843.8193786621093, "max": 1696.3001708984375, "pos_frac": 0.78125, "sample": [809.7345581054688, 803.1050415039062, 367.30419921875, 378.93524169921875, -219.11611938476562, 1445.447265625, -111.7898941040039, 130.34071350097656, -177.39552307128906, 437.5911865234375, 834.5961303710938, 1.9015941619873047, 61.71910858154297, 637.0382690429688, -138.4472198486328, 228.93612670898438, -626.5985107421875, 339.1134033203125, -131.3064422607422, 168.09799194335938, 461.4596862792969, 668.141357421875, 102.46051025390625, 803.6358032226562, -19.752464294433594, 389.01708984375, 692.4227294921875, 469.9302062988281, 844.6880493164062, -88.31306457519531, 2.7166099548339844, -63.58494567871094, 351.33404541015625, 339.8182067871094, 725.223388671875, 359.44677734375, 220.4912872314453, 313.2398681640625, 1053.4918212890625, -365.0897216796875, 1696.3001708984375, 222.61907958984375, 721.7109375, 899.954345703125, 1154.0089111328125, 83.35906219482422, 85.81155395507812, 841.79248046875, 194.91500854492188, -98.61531066894531, 585.542236328125, -139.1829833984375, 685.82275390625, 736.3925170898438, -248.9430389404297, 783.400390625, -380.4222717285156, 86.16921997070312, 1370.306640625, 772.1138305664062, 833.1295166015625, 595.7673950195312, 218.94354248046875, 356.68719482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 453.39361572265625, "std": 685.3291625976562, "min": -1112.8330078125, "p10": -193.0202865600586, "median": 433.16363525390625, "p90": 1051.2966674804688, "max": 3440.769775390625, "pos_frac": 0.765625, "sample": [129.62667846679688, 164.1165771484375, 123.47315216064453, -661.1049194335938, 301.12255859375, 180.95169067382812, 1130.75732421875, 472.28631591796875, 580.689208984375, -475.93914794921875, 718.9649047851562, 1807.42724609375, 110.87379455566406, 454.49615478515625, 651.3068237304688, -1112.8330078125, -138.70693969726562, 566.2255249023438, 781.20849609375, 241.86526489257812, 516.9737548828125, -87.16534423828125, 757.399169921875, 726.671630859375, -195.60003662109375, 647.2589721679688, 1079.8380126953125, 201.05667114257812, -318.7638244628906, 817.5336303710938, 1054.989501953125, 630.1913452148438, 2096.2001953125, 557.423095703125, 2263.62060546875, 910.9857177734375, -64.77777862548828, 635.2448120117188, -63.37456512451172, 411.83111572265625, 390.2673645019531, 277.01763916015625, 761.8470458984375, 3440.769775390625, 501.4754333496094, 102.91351318359375, 637.04052734375, -114.83450317382812, -619.8762817382812, 160.50241088867188, 345.61859130859375, 327.7541809082031, 831.6590576171875, -69.74028015136719, -21.97290802001953, 702.47802734375, 327.12664794921875, 461.55615234375, 593.5447998046875, 1042.6800537109375, 216.46189880371094, 507.4930419921875, -201.93603515625, -187.00086975097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 298.15496826171875, "std": 523.5525512695312, "min": -1493.925048828125, "p10": -389.29450683593745, "median": 281.55043029785156, "p90": 894.6629516601563, "max": 1680.5816650390625, "pos_frac": 0.765625, "sample": [686.03173828125, 694.2867431640625, 90.77680969238281, 252.0013885498047, -418.2938232421875, 685.9707641601562, 107.330078125, 444.14971923828125, 506.34259033203125, -486.52203369140625, 47.55683135986328, 424.4188232421875, -61.657066345214844, 1276.939208984375, 773.687744140625, 358.3509521484375, 121.70248413085938, 214.09994506835938, -400.3194885253906, -1493.925048828125, 487.54522705078125, 1680.5816650390625, 115.94585418701172, 879.4912719726562, -441.2598876953125, 317.8416748046875, 24.363277435302734, -645.5874633789062, 901.1651000976562, 742.903076171875, 1072.1806640625, -45.79924011230469, 166.91769409179688, 773.186767578125, 261.05218505859375, 1180.25634765625, -363.5695495605469, 231.81143188476562, 218.97991943359375, 733.8069458007812, 575.6383056640625, 324.25921630859375, -97.98518371582031, 552.9944458007812, 271.9283142089844, 309.3941955566406, 1020.4270629882812, 148.98773193359375, -61.13037109375, 335.75537109375, 50.01976013183594, 564.51318359375, 196.2412872314453, -64.52153015136719, 696.2044067382812, 182.65231323242188, 291.17254638671875, 306.324951171875, -141.2255096435547, 333.0498352050781, -538.1642456054688, -326.9350891113281, 734.4656982421875, 1303.11083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 524.6498413085938, "std": 676.0696411132812, "min": -793.0016479492188, "p10": -175.22916564941406, "median": 427.1615905761719, "p90": 1446.4131103515626, "max": 2318.220703125, "pos_frac": 0.78125, "sample": [1469.17822265625, 427.2772216796875, 2255.3759765625, 1151.641845703125, 1043.864013671875, 695.9877319335938, -76.02320098876953, 1450.3072509765625, 103.02413177490234, 341.5802917480469, 287.5285339355469, 858.5133666992188, -48.2193489074707, 129.6739044189453, -526.2023315429688, 9.673696517944336, -405.8983459472656, -793.0016479492188, 321.9346008300781, 1366.9415283203125, -733.307373046875, 948.622802734375, 1641.833740234375, -471.7968444824219, 1149.3741455078125, 766.7009887695312, 111.4632339477539, 1027.9503173828125, 491.8979187011719, -18.149124145507812, -170.43548583984375, 1256.653564453125, 449.8125, 19.222558975219727, 422.0787353515625, 1141.08544921875, 138.24752807617188, 230.50531005859375, -141.75067138671875, 276.82318115234375, 863.9288330078125, 427.04595947265625, 33.01100158691406, 1806.6544189453125, 429.29058837890625, 463.1461181640625, -177.28359985351562, 477.3096008300781, 952.0082397460938, -75.33499908447266, 648.1092529296875, -244.37147521972656, 225.11248779296875, 358.3804931640625, 705.15478515625, 485.78143310546875, 372.9205322265625, 1692.3519287109375, 481.19476318359375, 1437.3267822265625, 1245.369384765625, -15.850624084472656, 2318.220703125, 68.12399291992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 410.1479797363281, "std": 546.0565185546875, "min": -657.7195434570312, "p10": -177.73114166259765, "median": 320.56427001953125, "p90": 936.089501953125, "max": 2478.130126953125, "pos_frac": 0.796875, "sample": [911.4754028320312, 341.0340270996094, 300.0945129394531, -485.2162170410156, -157.49417114257812, -21.913732528686523, 891.1123046875, 438.4447326660156, 505.64617919921875, 404.3285827636719, -554.3035888671875, 645.0693359375, -55.123321533203125, 941.5546875, 784.0391235351562, 1101.407958984375, 635.8994140625, 240.2811279296875, 418.8438720703125, 969.2958984375, 897.75732421875, 68.3905258178711, -657.7195434570312, 908.753662109375, 653.2084350585938, -176.87939453125, 825.5824584960938, -328.7835693359375, 738.1431884765625, 805.729736328125, 446.29815673828125, -640.057861328125, 84.81813049316406, 292.7631530761719, 123.02227783203125, -191.11744689941406, 618.676513671875, 217.34866333007812, 234.5001220703125, 66.518310546875, 138.94410705566406, 148.1885528564453, 186.577880859375, 136.32806396484375, 2478.130126953125, -91.03240966796875, 923.33740234375, 1171.71533203125, 1419.2720947265625, 224.56201171875, 197.92408752441406, 722.2951049804688, 1442.4637451171875, 232.15655517578125, -141.6862030029297, -178.09617614746094, 569.3494262695312, 19.297378540039062, 835.13720703125, 6.2968902587890625, 874.7564086914062, 779.7303466796875, 831.528076171875, 80.865966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 484.1087646484375, "std": 536.754638671875, "min": -655.0139770507812, "p10": -58.41666870117184, "median": 488.8970031738281, "p90": 1188.4297973632815, "max": 1998.02587890625, "pos_frac": 0.84375, "sample": [576.70703125, 1044.281494140625, 797.3331298828125, -172.7856903076172, 1130.8150634765625, 897.4268798828125, -74.04093933105469, 1051.98681640625, 1021.320068359375, 40.84955596923828, 1414.0428466796875, 213.01828002929688, 579.109619140625, 343.25433349609375, 1563.6605224609375, -168.0447540283203, -1.9813385009765625, 482.2471923828125, 495.54681396484375, -21.960037231445312, 382.22894287109375, 133.88836669921875, 673.7686157226562, 684.7314453125, 1271.38525390625, 371.91912841796875, 1213.121826171875, 898.456787109375, 140.59075927734375, 547.9577026367188, 699.8688354492188, 21.060821533203125, 4.529912948608398, 272.06134033203125, 256.1456298828125, 1639.99951171875, 727.3856811523438, 621.4135131835938, 1998.02587890625, 8.085330963134766, -655.0139770507812, 1047.769775390625, 556.6084594726562, 745.76708984375, 617.8184204101562, -580.5272216796875, 181.3459014892578, 1474.3531494140625, -6.318378448486328, 298.09136962890625, 763.7071533203125, -80.42896270751953, -505.3961486816406, 116.66485595703125, 787.9298095703125, 515.7040405273438, 297.1145935058594, 499.2116394042969, 71.5832748413086, 657.5139770507812, 2.4030609130859375, 67.26398468017578, 210.40206909179688, 119.98199462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 432.84381103515625, "std": 637.02587890625, "min": -1200.4569091796875, "p10": -232.0070983886718, "median": 368.665771484375, "p90": 1227.663525390625, "max": 1895.2579345703125, "pos_frac": 0.765625, "sample": [164.01470947265625, 237.63262939453125, -755.605224609375, 932.0753173828125, 198.6699981689453, 1158.8975830078125, 98.13714599609375, 282.0542907714844, 433.54974365234375, -154.62698364257812, 1192.2689208984375, 294.80731201171875, 481.7296447753906, 593.3118286132812, 393.07208251953125, -176.3758544921875, 1214.801025390625, 808.2791137695312, 344.25946044921875, -92.44313049316406, 989.4290771484375, -34.87274169921875, 579.7335205078125, 703.6283569335938, -532.1937866210938, -102.4379653930664, 310.22821044921875, -47.792633056640625, 467.6745300292969, 23.920639038085938, 598.2022705078125, 1484.66796875, -255.84906005859375, 629.0916137695312, -568.9356689453125, -158.43304443359375, 720.5982055664062, -868.0179443359375, -51.34907531738281, 706.8930053710938, 1504.8739013671875, 298.8408508300781, 1590.503662109375, 315.5969543457031, 1182.6077880859375, 1233.176025390625, 1100.3436279296875, 290.2900390625, 434.36566162109375, 539.791259765625, 29.558250427246094, 306.6921081542969, 1167.88818359375, 114.27651977539062, 511.8008117675781, 1895.2579345703125, 1847.50732421875, 164.57980346679688, -579.5126342773438, -1200.4569091796875, 1255.1734619140625, 224.1739501953125, 464.95013427734375, 767.0299072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 374.772216796875, "std": 602.982666015625, "min": -1571.0372314453125, "p10": -259.9686096191406, "median": 278.10987854003906, "p90": 1002.0992492675783, "max": 2425.841552734375, "pos_frac": 0.78125, "sample": [119.32980346679688, 623.3641357421875, 167.60836791992188, 1614.105224609375, 297.760009765625, 1044.8072509765625, 5.0054168701171875, 79.27918243408203, -74.186279296875, 125.44192504882812, 832.5225830078125, 205.28749084472656, 275.95050048828125, -409.51495361328125, 668.6802978515625, -290.9967346191406, 714.5987548828125, -1571.0372314453125, 189.07826232910156, 365.7907409667969, 243.96539306640625, 280.2692565917969, 1808.3612060546875, 66.22045135498047, 84.0964126586914, 838.1605224609375, -229.98123168945312, 406.2637939453125, 666.9479370117188, 44.27349853515625, 359.3514099121094, 1020.1655883789062, 544.62890625, 526.047119140625, 959.9444580078125, -62.520164489746094, 186.0511932373047, -30.91562271118164, -453.1783447265625, -82.32056427001953, -638.6573486328125, 899.2161865234375, 832.7349243164062, 73.83265686035156, 1582.8157958984375, 472.3619689941406, 621.8299560546875, 1268.8441162109375, 210.22406005859375, 470.9395751953125, 781.4093017578125, -272.8203430175781, 259.85247802734375, -50.129669189453125, 704.308349609375, -350.8924255371094, 205.46517944335938, 379.9317932128906, 507.5808410644531, 833.8726196289062, -129.1738739013672, 2425.841552734375, 539.9248657226562, 197.40243530273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 314.478271484375, "std": 553.3341064453125, "min": -766.2897338867188, "p10": -344.47976074218747, "median": 275.37547302246094, "p90": 1052.7723449707032, "max": 1761.9246826171875, "pos_frac": 0.765625, "sample": [836.6806640625, 35.358367919921875, 42.50916290283203, 461.974365234375, 378.1141662597656, -30.27858543395996, 280.231201171875, -294.4596252441406, 308.7105407714844, 1210.392333984375, 1381.7591552734375, 558.32958984375, 273.1573791503906, 1.1034393310546875, -544.363525390625, 1021.1008911132812, 484.1597595214844, 207.56484985351562, 141.01910400390625, 6.596534729003906, 1306.638671875, 684.262451171875, 175.09579467773438, 386.6850891113281, -365.9169616699219, -762.6981811523438, 502.37921142578125, 134.470458984375, 40.642677307128906, 1066.3458251953125, 1761.9246826171875, 116.9061050415039, -550.3734130859375, 94.6153793334961, -55.25526809692383, -683.0062255859375, -155.29592895507812, 213.45059204101562, 810.9219360351562, 551.482666015625, 539.5744018554688, 376.46234130859375, 416.25616455078125, 722.0419311523438, -766.2897338867188, 872.565185546875, 195.2068634033203, -280.49951171875, 165.34454345703125, 687.7186279296875, 154.66786193847656, -278.7020568847656, 1731.0809326171875, 358.28179931640625, 590.046142578125, 130.31350708007812, 470.96832275390625, 277.59356689453125, -204.06329345703125, 535.4921264648438, 996.51171875, -414.1495361328125, 1110.336181640625, -293.0854187011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 231.5129852294922, "std": 505.4405212402344, "min": -918.4515380859375, "p10": -383.0775268554687, "median": 279.5992431640625, "p90": 768.7135498046875, "max": 1806.037109375, "pos_frac": 0.734375, "sample": [371.823486328125, 99.08551025390625, -177.080810546875, 389.1516418457031, 394.5159606933594, -284.31915283203125, -161.19512939453125, -639.6485595703125, 760.1769409179688, 58.22196960449219, -257.9021911621094, 304.6871032714844, 295.7803039550781, 325.6195983886719, 201.3913116455078, 247.93594360351562, 340.77728271484375, 262.1234130859375, 6.8942718505859375, 981.1284790039062, 763.0485229492188, 541.491943359375, 268.70281982421875, -169.55506896972656, 445.3862609863281, 470.3385925292969, 328.7428283691406, -106.03559875488281, 337.7720947265625, -918.4515380859375, 95.87557983398438, 330.08935546875, -415.68707275390625, -228.28054809570312, 653.1240234375, 343.1417236328125, 1157.1217041015625, 771.1414184570312, 392.8226013183594, 511.9577331542969, 324.26251220703125, -442.62847900390625, 572.9468383789062, 558.9671020507812, 1648.6287841796875, 462.875732421875, 877.2827758789062, -592.1741943359375, 158.11117553710938, 527.4743041992188, -821.0736083984375, 157.36912536621094, 81.76171112060547, 290.49566650390625, 204.89364624023438, 14.13912582397461, 859.763427734375, 54.7750244140625, -66.41747283935547, -306.98858642578125, 1806.037109375, 189.12176513671875, -783.0389404296875, -51.66796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 489.87103271484375, "std": 759.044921875, "min": -1519.362548828125, "p10": -348.9917327880859, "median": 515.4632873535156, "p90": 1254.4968505859376, "max": 2771.094970703125, "pos_frac": 0.78125, "sample": [-1519.362548828125, 850.0211181640625, 203.74798583984375, 602.9067993164062, 165.37338256835938, 328.59088134765625, 157.46954345703125, 71.35404968261719, 2269.19189453125, 916.6434936523438, 329.29144287109375, -799.5990600585938, 796.2650756835938, 161.75550842285156, 1154.658203125, 378.7896423339844, -343.41888427734375, 1197.598876953125, 762.2761840820312, -351.3800964355469, -78.33245086669922, -507.0876159667969, -289.91796875, 120.29411315917969, -864.0942993164062, -672.9198608398438, 28.362754821777344, 1204.9537353515625, 921.0438232421875, 92.8162841796875, 685.044189453125, 571.68896484375, 668.3278198242188, 1149.00048828125, 986.2644653320312, -854.7953491210938, -309.9582214355469, 921.172607421875, 359.6378479003906, 517.3464965820312, 979.54931640625, 1078.9735107421875, 1282.23876953125, 2233.34423828125, 733.2816162109375, -152.81683349609375, 362.1553955078125, 573.3729248046875, 433.8055114746094, 2771.094970703125, 856.5997314453125, 86.55895233154297, 1455.038330078125, 1275.7296142578125, 720.5807495117188, 513.580078125, -256.69744873046875, 652.1064453125, 1686.589599609375, -182.95156860351562, 247.45648193359375, 1115.8955078125, 559.3552856445312, 345.8843994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 383.070068359375, "std": 740.3828125, "min": -1149.404541015625, "p10": -513.152798461914, "median": 259.54518127441406, "p90": 1386.7446655273443, "max": 2990.776123046875, "pos_frac": 0.703125, "sample": [-835.510986328125, -641.9672241210938, 45.1640625, 1012.4359130859375, 1433.4112548828125, 126.7043685913086, 140.1468963623047, 195.74148559570312, 322.23590087890625, -419.7551574707031, 534.034423828125, 730.8671875, 512.3956298828125, 1902.155517578125, -649.6982421875, 195.58505249023438, 1277.85595703125, 382.64324951171875, 998.0488891601562, 993.2796630859375, -672.0238037109375, -570.7346801757812, 1233.5595703125, 1187.8564453125, 44.30497741699219, -553.1803588867188, -30.142723083496094, 483.52703857421875, 632.3060302734375, -56.72906494140625, 269.4895935058594, 492.8344421386719, -235.22830200195312, 160.8719482421875, 1483.7373046875, -183.10147094726562, -96.7816162109375, -284.69720458984375, 793.9931640625, 538.65966796875, -89.05569458007812, 1653.58642578125, 542.0629272460938, 618.2957153320312, 778.8012084960938, 81.27970886230469, -229.71270751953125, 2075.97021484375, 1445.9129638671875, 249.60076904296875, 70.1723861694336, 209.17666625976562, 296.5509948730469, 2990.776123046875, -1149.404541015625, -346.11700439453125, 499.79937744140625, -34.150142669677734, 1039.019775390625, -1.6614723205566406, 286.1674499511719, 181.02474975585938, 15.20234489440918, 438.8907775878906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 363.982421875, "std": 557.7740478515625, "min": -848.8522338867188, "p10": -251.13099670410156, "median": 317.7734069824219, "p90": 1220.7157348632816, "max": 1776.797607421875, "pos_frac": 0.75, "sample": [390.7853698730469, -392.50732421875, 493.58624267578125, 613.020263671875, 446.3272705078125, 760.177978515625, 309.59381103515625, -7.605352401733398, -85.08045196533203, 1128.1068115234375, -197.98300170898438, 119.16669464111328, -253.11215209960938, 1339.4266357421875, 530.337890625, 156.64454650878906, 898.832275390625, 395.2737731933594, 113.69393920898438, 1065.561279296875, 1776.797607421875, 201.34774780273438, 466.818603515625, 1260.4052734375, -611.813232421875, 212.38967895507812, 615.3038940429688, 257.7231140136719, 102.21290588378906, 140.6363983154297, -368.53265380859375, 1466.790283203125, 325.9530029296875, 1726.890380859375, -220.6650390625, 476.0313720703125, -121.7924575805664, 393.449951171875, 64.44561767578125, 470.20758056640625, 530.9889526367188, 530.2754516601562, -246.50830078125, -123.8863525390625, 1378.3089599609375, 579.4397583007812, 820.956298828125, -848.8522338867188, 109.43949127197266, 179.06900024414062, -147.12469482421875, -594.5033569335938, 1422.7225341796875, 161.15267944335938, 502.5749206542969, 371.7043151855469, 463.40606689453125, 305.3413391113281, -376.57904052734375, 405.7005615234375, 272.82928466796875, 1083.8179931640625, -117.59249877929688, 173.34877014160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 552.135986328125, "std": 664.7349243164062, "min": -1153.2401123046875, "p10": -146.4841430664062, "median": 517.9977722167969, "p90": 1443.9443603515626, "max": 2463.284423828125, "pos_frac": 0.84375, "sample": [660.8850708007812, -462.8314208984375, -715.34228515625, -70.82319641113281, 1609.682373046875, 53.565773010253906, 77.51355743408203, 243.3302764892578, 709.4431762695312, 791.9188232421875, 792.53564453125, 341.69439697265625, 234.4630889892578, 908.4725341796875, 618.9974365234375, -311.242431640625, -213.13186645507812, 936.0248413085938, -1153.2401123046875, 114.46441650390625, 1690.3074951171875, 150.29754638671875, 1037.55908203125, 511.97100830078125, 1247.97021484375, 2463.284423828125, 86.3318862915039, 1136.32763671875, 1288.181640625, -111.08035278320312, 1453.6026611328125, 545.7653198242188, 770.8829956054688, 270.28497314453125, 850.5437622070312, 1635.3658447265625, 194.36380004882812, 529.8121337890625, 394.21722412109375, 1067.4454345703125, -20.606643676757812, 1421.4083251953125, -161.65719604492188, 548.4555053710938, 202.83242797851562, 195.65216064453125, 1405.8551025390625, 271.24774169921875, 1019.5314331054688, 1165.167724609375, -946.6878662109375, 253.3113250732422, 55.540672302246094, 381.6409606933594, 221.4717254638672, 524.0245361328125, 794.85595703125, 1612.7982177734375, 240.88632202148438, 359.80615234375, 1580.7906494140625, 841.6357421875, 755.5931396484375, 233.3355255126953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 386.90313720703125, "std": 593.39111328125, "min": -1043.0831298828125, "p10": -121.12724456787107, "median": 327.34132385253906, "p90": 869.1915649414063, "max": 2618.849365234375, "pos_frac": 0.84375, "sample": [614.55126953125, 425.475341796875, -169.30184936523438, 437.40203857421875, 374.384765625, 349.8395690917969, -668.0018310546875, 383.7079772949219, 2404.882080078125, 630.6509399414062, 54.169776916503906, -219.7462158203125, 98.39985656738281, -130.39381408691406, 1.322113037109375, 516.4801025390625, 163.42799377441406, 304.84307861328125, 161.60177612304688, 146.55397033691406, 1142.25537109375, -57.38482666015625, 207.40480041503906, 2618.849365234375, 533.8008422851562, 49.237579345703125, 631.5440063476562, 653.754638671875, 170.7812042236328, -1043.0831298828125, 356.7434387207031, -99.5052490234375, 841.2501831054688, 234.69859313964844, 268.96636962890625, 836.4077758789062, 222.9099884033203, -680.9332885742188, 925.8446655273438, 541.4930419921875, 813.1978759765625, 881.1664428710938, 52.345497131347656, 720.2649536132812, 243.12295532226562, 36.0965576171875, 394.7953186035156, -1.761688232421875, 125.44206237792969, 268.56060791015625, 1561.169189453125, 690.1119995117188, 765.7552490234375, 656.6700439453125, 526.7103271484375, 808.9146118164062, -770.1595458984375, 6.104423522949219, 101.87922668457031, 1264.9805908203125, 586.8275146484375, 521.6262817382812, 17.255512237548828, 255.4415283203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 305.0443115234375, "std": 593.2865600585938, "min": -1224.2554931640625, "p10": -227.53450622558591, "median": 234.83583068847656, "p90": 1014.8455200195312, "max": 2381.854736328125, "pos_frac": 0.71875, "sample": [138.10333251953125, 361.6241455078125, -749.5984497070312, 1479.7994384765625, -191.22454833984375, -150.78553771972656, 641.7971801757812, 192.93789672851562, 1007.3388061523438, 59.3052978515625, 1363.472900390625, 296.8968200683594, 543.5331420898438, 116.92631530761719, 645.3353881835938, -87.43302917480469, 859.5340576171875, 403.39752197265625, -178.121826171875, 39.9124755859375, -81.39366149902344, 432.145263671875, 1029.41357421875, -310.081787109375, -1224.2554931640625, 236.92431640625, 339.1326904296875, -855.0760498046875, 422.3070373535156, 622.1187744140625, -155.74447631835938, 267.8856506347656, -46.117576599121094, -75.56449127197266, -161.94052124023438, 124.21726989746094, 829.2333374023438, 599.4199829101562, 912.5228881835938, 232.74734497070312, 116.19813537597656, -531.648681640625, 355.0147399902344, 1282.6839599609375, -243.09591674804688, -83.96614074707031, 2381.854736328125, 494.3341064453125, 8.599506378173828, -362.2135925292969, 1018.0626831054688, 559.9193115234375, 202.9303741455078, 506.41192626953125, 215.40847778320312, 1775.64599609375, -88.4376220703125, 157.66574096679688, 255.548095703125, 21.152477264404297, 510.44842529296875, 239.0233154296875, 773.21142578125, 27.43994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 455.75689697265625, "std": 550.9793090820312, "min": -1134.2239990234375, "p10": -208.5198059082031, "median": 442.661376953125, "p90": 1137.3185058593754, "max": 1429.06494140625, "pos_frac": 0.796875, "sample": [690.2960205078125, -461.03570556640625, 685.826171875, 568.933349609375, 516.0391235351562, 960.5853271484375, 1188.884033203125, 1429.06494140625, 1281.837646484375, 256.2158203125, 113.6553955078125, -45.0009765625, 172.92526245117188, 908.3229370117188, 789.57177734375, -278.9920654296875, 373.39312744140625, 781.1982421875, 983.338623046875, -852.3822631835938, 380.127197265625, 202.71473693847656, 310.642578125, 910.5825805664062, 301.3216247558594, 238.79702758789062, 821.7161254882812, 828.3629760742188, 867.8148193359375, -183.01962280273438, 151.97669982910156, 1045.75830078125, -219.44845581054688, 396.24212646484375, -339.737060546875, 1031.5048828125, 471.77496337890625, -88.61846923828125, 798.2061767578125, 212.58642578125, -1134.2239990234375, 89.17459869384766, -67.4501953125, 441.52740478515625, 1018.635986328125, 953.857666015625, 293.59783935546875, 788.199951171875, 1034.343505859375, 443.2228698730469, 193.2593536376953, -493.71875, 1.0395984649658203, 1288.3133544921875, 442.0998840332031, 480.5601806640625, -181.67709350585938, 748.6511840820312, -74.76100158691406, 687.1466064453125, 1391.5091552734375, 79.03331756591797, 1367.55908203125, 1176.55859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 465.24932861328125, "std": 518.1373901367188, "min": -451.5715637207031, "p10": -97.52218856811521, "median": 398.3018035888672, "p90": 1033.3616943359375, "max": 2567.714599609375, "pos_frac": 0.796875, "sample": [402.0840759277344, -16.462614059448242, -75.49808502197266, 892.9041137695312, 454.82122802734375, 65.66217041015625, 695.5428466796875, 1035.4091796875, 880.3865356445312, 943.377197265625, 68.47476196289062, 384.0380554199219, 372.079833984375, -16.993362426757812, 1028.584228515625, 346.62506103515625, 219.5501708984375, -126.39378356933594, 2567.714599609375, 394.51953125, 1693.0264892578125, 215.3594207763672, 181.65316772460938, 842.9017944335938, -124.34622955322266, 672.1942138671875, 1579.060546875, -28.914321899414062, -149.8535614013672, 320.3753356933594, 517.6644897460938, 443.3375549316406, 587.6016235351562, -275.22369384765625, 552.29443359375, -106.96109008789062, 100.54479217529297, 512.060546875, 192.72723388671875, 800.0812377929688, 1195.5338134765625, 511.35272216796875, 1199.2435302734375, 434.78948974609375, 253.6776885986328, 431.9897766113281, -59.06709289550781, 459.3924560546875, 479.08294677734375, 646.964599609375, 115.97611236572266, -2.6389312744140625, 878.7681884765625, 266.55096435546875, 332.3338928222656, 57.766868591308594, 123.05354309082031, 267.3320007324219, 926.9619140625, -451.5715637207031, 629.4518432617188, -148.65255737304688, 1319.603759765625, 866.0505981445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 294.22900390625, "std": 595.4208374023438, "min": -1897.4224853515625, "p10": -283.30132446289065, "median": 215.64205169677734, "p90": 957.8495910644533, "max": 2074.11767578125, "pos_frac": 0.671875, "sample": [501.1344909667969, 101.79351806640625, 205.490478515625, 512.4854736328125, -12.757293701171875, 225.7936248779297, 788.4929809570312, 320.432861328125, 821.4519653320312, -1897.4224853515625, 1341.0740966796875, 669.6753540039062, 1384.6429443359375, 2074.11767578125, -181.19561767578125, -17.125328063964844, 174.93939208984375, -426.5462951660156, -56.31005859375, -245.07333374023438, -170.67779541015625, 281.77264404296875, 195.5646209716797, 106.59410858154297, 47.36012268066406, -32.59431457519531, 193.06417846679688, -423.164306640625, 659.0396728515625, 688.590576171875, -14.70319938659668, 576.70458984375, -614.427734375, -477.60052490234375, 360.1075744628906, 1191.0765380859375, 1157.8330078125, 449.53851318359375, 403.53668212890625, 643.179931640625, 667.4317016601562, -281.7456359863281, 978.8142700195312, 504.55633544921875, 1728.96435546875, 390.78570556640625, -136.1185302734375, 197.13870239257812, 122.1937255859375, 300.48223876953125, -75.85369873046875, 908.9320068359375, 381.6982421875, 885.894287109375, -30.76361083984375, 180.6681365966797, 580.3207397460938, 444.370361328125, -604.5509033203125, -6.8851776123046875, 173.23789978027344, -283.9680480957031, 311.9051513671875, -12.741191864013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 571.4762573242188, "std": 794.742431640625, "min": -702.665283203125, "p10": -145.54283752441404, "median": 419.00547790527344, "p90": 1365.1382446289062, "max": 3929.041015625, "pos_frac": 0.78125, "sample": [453.4372863769531, 256.989501953125, 1256.7196044921875, 342.9256896972656, 1.3591079711914062, -56.76350402832031, 190.12158203125, 665.1571044921875, 691.2921752929688, 503.5025939941406, 1365.2969970703125, -521.173828125, 155.68075561523438, 562.791748046875, -108.7533187866211, 1123.53955078125, -71.22257232666016, 889.6289672851562, 907.0470581054688, 977.011474609375, -589.4840087890625, -92.59664916992188, 684.5515747070312, -21.80348014831543, 1602.946044921875, 130.7906494140625, 465.9356689453125, 2521.24609375, 1014.748779296875, 429.9677429199219, 1528.0950927734375, 98.19245910644531, 480.8658447265625, 1185.4981689453125, 476.04364013671875, 1341.2291259765625, 1459.7041015625, 259.8924865722656, 588.9122924804688, -186.62545776367188, 151.06295776367188, -126.7779541015625, 3012.07763671875, 1364.767822265625, 705.863037109375, -153.58493041992188, 3929.041015625, -352.6041259765625, 626.9921875, -64.64846801757812, 1033.722412109375, 196.46426391601562, 867.861328125, -702.665283203125, 350.10540771484375, 323.784423828125, 408.043212890625, 228.2388458251953, -371.8105163574219, 1149.4080810546875, 103.9366683959961, 209.46705627441406, 337.835693359375, 385.2030029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 402.702880859375, "std": 608.141357421875, "min": -679.7423095703125, "p10": -310.0439331054687, "median": 329.3836212158203, "p90": 1135.2161376953125, "max": 1856.366943359375, "pos_frac": 0.6875, "sample": [317.3559875488281, -20.435501098632812, 305.200927734375, 1128.4735107421875, 1644.0400390625, 205.86273193359375, 351.2978820800781, -193.06988525390625, -105.29039001464844, -230.49395751953125, 192.25900268554688, 86.82520294189453, -598.7360229492188, 1213.9459228515625, 887.4586181640625, 1043.04931640625, -45.98735046386719, -225.1763916015625, 1638.53369140625, 604.92333984375, 1856.366943359375, 746.1156005859375, 228.12661743164062, 42.77678680419922, -47.579124450683594, -547.1336059570312, 571.275390625, 843.914306640625, -463.1622314453125, -426.5351257324219, 918.850341796875, 352.53558349609375, 696.0366821289062, 989.3826904296875, 1051.174560546875, 912.5602416992188, 407.7572937011719, 665.1089477539062, 62.79869842529297, 82.35171508789062, -149.55093383789062, 465.22113037109375, 341.4112548828125, 249.24493408203125, 1545.7801513671875, 986.5270385742188, 1125.2440185546875, 197.13467407226562, -344.13677978515625, -679.7423095703125, -86.47006225585938, -142.19166564941406, 1138.1058349609375, 953.9675903320312, -587.9767456054688, -138.0186309814453, 1120.8125, 367.3548583984375, 469.1078186035156, 132.772705078125, 1313.062255859375, 570.8123168945312, -52.73854064941406, -165.50697326660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 398.7236022949219, "std": 532.7198486328125, "min": -1006.4948120117188, "p10": -315.64989929199214, "median": 392.27752685546875, "p90": 1112.4030517578126, "max": 1742.302001953125, "pos_frac": 0.796875, "sample": [163.99954223632812, 1048.737548828125, -402.16400146484375, -389.0257263183594, 389.36474609375, 466.91619873046875, 366.08929443359375, 686.7978515625, 1135.0289306640625, 561.4159545898438, 521.1680908203125, 799.5146484375, -121.3417739868164, 455.5811462402344, 1143.1058349609375, -41.98834991455078, 132.15499877929688, 80.12933349609375, 23.038217544555664, 708.4359741210938, 34.181793212890625, 1105.530029296875, 307.0448913574219, -381.4112548828125, 157.9807891845703, 503.1153869628906, -749.3323974609375, -10.773506164550781, 934.1080932617188, 737.2568969726562, 140.68759155273438, 804.83349609375, 1664.015380859375, 1742.302001953125, 333.2158508300781, 185.16580200195312, -1006.4948120117188, 1240.81689453125, 972.3470458984375, -172.5089569091797, -336.0301513671875, -484.14691162109375, 395.1903076171875, 43.58854675292969, 545.07861328125, 1115.3486328125, 324.0909118652344, 388.3861083984375, 781.9436645507812, 96.07820892333984, 649.9790649414062, 232.3626251220703, 305.25335693359375, 731.665771484375, 264.6239318847656, 506.38592529296875, -61.38176727294922, 633.3466186523438, 1209.0704345703125, 484.33013916015625, 554.6917724609375, 522.4673461914062, 615.0445556640625, -268.0959777832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 223.2398681640625, "std": 608.7569580078125, "min": -999.115234375, "p10": -389.6647125244141, "median": 190.40106201171875, "p90": 840.6015502929688, "max": 2337.590576171875, "pos_frac": 0.65625, "sample": [998.7646484375, -946.8876342773438, -600.78955078125, 337.50897216796875, -114.02597045898438, 798.7164306640625, 69.77397918701172, -384.58392333984375, 2299.83447265625, -109.34998321533203, 666.9324340820312, -296.009765625, 273.3816833496094, 845.3585205078125, 727.5332641601562, -999.115234375, -18.690034866333008, -554.864501953125, 273.20989990234375, 220.14767456054688, 385.2960510253906, -56.20945358276367, 201.46824645996094, 1351.0660400390625, -142.7071990966797, 2337.590576171875, 116.29740905761719, 829.501953125, 109.38993072509766, -143.8241729736328, 34.513450622558594, -205.46209716796875, 288.01837158203125, -525.1390380859375, 254.71580505371094, -391.8421936035156, -83.81919860839844, 308.2850646972656, 200.1797637939453, 432.3040771484375, 125.913818359375, -299.99053955078125, 235.35992431640625, -757.7806396484375, 215.36187744140625, 449.1092529296875, 170.3270263671875, 335.01239013671875, 659.66796875, -53.485511779785156, 126.42677307128906, 1119.3291015625, 591.9154663085938, 189.3062286376953, -275.94244384765625, 15.514053344726562, 286.87347412109375, 191.4958953857422, 547.5547485351562, -288.3044128417969, 1429.482177734375, 186.08436584472656, 303.38421630859375, -1.7322101593017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 356.5467529296875, "std": 637.2601928710938, "min": -1740.0013427734375, "p10": -372.7593994140625, "median": 279.48895263671875, "p90": 1090.7616821289066, "max": 2163.252197265625, "pos_frac": 0.734375, "sample": [1197.4234619140625, 929.3612670898438, -257.7319030761719, 394.0137939453125, -125.40264129638672, 461.119384765625, -407.7779235839844, -44.00196075439453, -242.951416015625, -583.5850830078125, 1429.43505859375, 1260.278564453125, -384.0746765136719, 572.6514892578125, -531.7029418945312, 126.61717224121094, 608.2698974609375, 677.057861328125, 10.562782287597656, -346.3570861816406, 104.62531280517578, -11.54255485534668, 281.5483703613281, -501.0672302246094, 810.8319091796875, 188.91928100585938, 40.930397033691406, -28.535446166992188, 589.807861328125, 472.3465270996094, -1740.0013427734375, 516.669921875, 2163.252197265625, 926.7469482421875, 500.53692626953125, 389.07806396484375, 227.251953125, -204.8277130126953, 63.34265899658203, 849.5031127929688, 231.11685180664062, 154.40650939941406, 1836.888916015625, 587.3663940429688, 706.3369140625, -790.3693237304688, 1018.6065673828125, 254.6124267578125, 1420.463134765625, 681.1333618164062, 23.784629821777344, 699.2174682617188, -179.80938720703125, 319.9012756347656, 275.14874267578125, 164.94818115234375, 1121.685302734375, 673.397705078125, 1017.0831298828125, -131.38485717773438, 265.66229248046875, 812.8209228515625, 277.4295349121094, 995.92431640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 368.16595458984375, "std": 601.584228515625, "min": -1051.818115234375, "p10": -359.1348693847656, "median": 375.38047790527344, "p90": 1113.627783203125, "max": 2097.4609375, "pos_frac": 0.71875, "sample": [578.2423095703125, -721.1699829101562, 128.97503662109375, 431.9990234375, 741.2059326171875, -1051.818115234375, 1252.7264404296875, 1057.3162841796875, 1138.54443359375, 1035.1258544921875, 736.0457153320312, 577.5604858398438, 164.2274169921875, 1009.0880126953125, 246.51406860351562, 157.27777099609375, 617.5765380859375, 441.4980163574219, 151.0044708251953, -126.2662582397461, 1547.3408203125, 356.6291198730469, 777.255859375, 510.1116943359375, -482.40631103515625, 1141.9273681640625, 369.04425048828125, -287.2425842285156, 434.72442626953125, -335.2000732421875, -923.1580200195312, 390.8293151855469, 1191.8345947265625, 772.6917114257812, 732.8093872070312, 599.2490234375, 915.2625732421875, 838.7761840820312, 344.2355041503906, -217.93807983398438, 1067.178955078125, -139.578369140625, -318.66607666015625, -105.021484375, 655.2827758789062, 337.769287109375, 635.9838256835938, 242.71954345703125, 1133.534423828125, 2097.4609375, 1009.53564453125, -127.39633178710938, -562.3670654296875, -33.42229461669922, 189.62840270996094, -369.39263916015625, 381.7167053222656, 249.87686157226562, -486.1153869628906, 68.46380615234375, 110.39680480957031, -181.01588439941406, -149.01878356933594, 612.6162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 245.96481323242188, "std": 550.3870239257812, "min": -1143.04931640625, "p10": -416.3603698730468, "median": 215.16271209716797, "p90": 998.9279174804688, "max": 1489.2076416015625, "pos_frac": 0.671875, "sample": [-352.5199890136719, 1126.239501953125, 527.1201171875, -805.2857055664062, 339.39080810546875, 342.01580810546875, -546.4163208007812, -50.996795654296875, 990.9166259765625, 173.21920776367188, 315.558349609375, 149.36328125, 368.51953125, -1143.04931640625, 74.2757568359375, 120.65076446533203, 164.56739807128906, 1158.459716796875, 174.88162231445312, -233.07083129882812, -177.064453125, 578.7410888671875, 320.71630859375, -76.25299835205078, 824.199462890625, 140.49282836914062, -16.910749435424805, -1.3472843170166016, -900.0321044921875, 913.2069702148438, 407.8958740234375, 237.53311157226562, 1050.9554443359375, 393.8329772949219, 810.5084228515625, -87.45075988769531, -327.2198486328125, -877.0004272460938, -442.08245849609375, 18.71088218688965, 1347.412353515625, 566.5753784179688, -148.67059326171875, -435.2669982910156, 590.3743286132812, 510.570556640625, 541.4427490234375, 1489.2076416015625, 1002.361328125, 1061.8712158203125, -15.350982666015625, 266.3367614746094, -372.2449035644531, 832.0096435546875, 266.00543212890625, -206.32212829589844, 192.7923126220703, 478.52099609375, 941.3638916015625, 117.77865600585938, 646.1943969726562, -38.723846435546875, 90.58118438720703, 331.658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 278.1268615722656, "std": 566.111083984375, "min": -842.8515625, "p10": -312.08165893554684, "median": 183.6535873413086, "p90": 1074.66416015625, "max": 2002.8570556640625, "pos_frac": 0.671875, "sample": [105.69978332519531, 887.9117431640625, -258.7096862792969, 1447.9024658203125, -543.2483520507812, 261.90966796875, 46.52910614013672, 276.0552673339844, 311.3839111328125, 1169.99560546875, -74.68975830078125, 365.2313232421875, -181.77151489257812, 453.5400390625, 662.553466796875, 350.8124694824219, 295.76385498046875, -299.0396728515625, -577.21630859375, 1398.091552734375, 843.2301025390625, 21.140701293945312, -83.98988342285156, 131.1002655029297, -40.296905517578125, 81.08351135253906, -39.561798095703125, 472.390625, 450.5540466308594, 67.60404968261719, 2002.8570556640625, 235.70654296875, 662.2471923828125, -783.38525390625, 1299.7440185546875, 1087.357177734375, 678.2188720703125, -317.67108154296875, -181.05743408203125, 104.73989868164062, 798.165283203125, -106.15260314941406, -52.15479278564453, -842.8515625, 136.2806854248047, -619.6442260742188, 105.83470153808594, -427.4462890625, 231.0264892578125, 1045.047119140625, -170.41192626953125, 976.284912109375, 532.42822265625, 1254.3592529296875, 485.7178649902344, 123.07347106933594, 258.70562744140625, 812.302978515625, -141.59521484375, 325.7165832519531, 66.22073364257812, -76.75749206542969, -248.6541290283203, 543.906494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 343.0375671386719, "std": 582.6983032226562, "min": -996.864501953125, "p10": -242.93414001464845, "median": 295.0702819824219, "p90": 1081.9549926757818, "max": 2282.6435546875, "pos_frac": 0.734375, "sample": [753.8084716796875, 111.35780334472656, 2282.6435546875, -59.703369140625, 401.6728820800781, -243.78866577148438, 11.350833892822266, 168.2339630126953, -4.779335021972656, 438.13519287109375, 296.7362060546875, 1337.3175048828125, -107.95671081542969, 466.68927001953125, -475.87884521484375, 267.7295837402344, 364.1572570800781, 77.70138549804688, 874.04638671875, 17.327255249023438, 264.2630615234375, 293.40435791015625, 884.875244140625, 1133.2669677734375, 331.80291748046875, 111.0807876586914, 13.147979736328125, 505.68505859375, -175.12538146972656, 1647.5712890625, -111.62860870361328, 395.54632568359375, 430.1848449707031, 1293.02001953125, 693.6608276367188, 69.74683380126953, 79.92748260498047, -87.75450134277344, 143.833740234375, 665.8809814453125, -140.44825744628906, 962.22705078125, 840.4110107421875, 456.6449890136719, -647.8369140625, -77.72042083740234, 476.23736572265625, -69.79650115966797, -240.94024658203125, 542.962890625, -279.56805419921875, -301.83282470703125, -996.864501953125, 751.0655517578125, 1141.6363525390625, 491.99053955078125, 340.5283508300781, 542.256591796875, -653.01220703125, 1962.94091796875, 90.76070404052734, 580.9418334960938, 380.8502197265625, 241.77894592285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 389.9460144042969, "std": 615.0328979492188, "min": -997.0142211914062, "p10": -240.35672149658203, "median": 318.2567596435547, "p90": 948.3567321777346, "max": 3375.825439453125, "pos_frac": 0.765625, "sample": [49.546417236328125, -537.2153930664062, 116.86593627929688, -180.98690795898438, 262.7544250488281, -432.1923522949219, 314.94915771484375, 384.01593017578125, 454.6056213378906, 179.66159057617188, 719.3975830078125, -354.63555908203125, 363.0177001953125, 1117.7354736328125, 83.47673797607422, 827.2761840820312, -144.4501953125, 1936.0875244140625, 1166.905029296875, 710.22705078125, 407.7376708984375, 806.2843627929688, -231.15615844726562, -244.29981994628906, 628.8271484375, 318.8607482910156, 772.0152587890625, 545.2227783203125, 222.85581970214844, 3375.825439453125, 87.04985046386719, -120.33552551269531, 916.9596557617188, 1158.64111328125, -148.89022827148438, 961.8126220703125, 962.240234375, 255.23306274414062, 627.8513793945312, -272.7698974609375, 645.9744873046875, 191.0357208251953, 523.3189086914062, 741.1759033203125, 481.4614562988281, 284.0244140625, 651.6648559570312, -131.02609252929688, 277.66229248046875, -266.0965576171875, 201.0504150390625, 23.388214111328125, -997.0142211914062, 184.09266662597656, 382.2062072753906, -205.8105010986328, 825.3440551757812, 890.1409912109375, 113.0623550415039, 440.22454833984375, -24.731185913085938, 317.65277099609375, 581.6137084960938, 759.1222534179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 431.7720947265625, "std": 582.35986328125, "min": -1144.7496337890625, "p10": -146.2045227050781, "median": 318.1550750732422, "p90": 1249.558154296875, "max": 1904.07275390625, "pos_frac": 0.828125, "sample": [163.55447387695312, 63.709861755371094, 309.8196105957031, 122.13048553466797, 89.60421752929688, 614.81494140625, 386.968017578125, -165.671630859375, 686.0684204101562, 1166.267822265625, 728.6309204101562, -1144.7496337890625, 233.83665466308594, 300.1717224121094, -244.7613983154297, 696.1874389648438, 737.8607177734375, 1404.5806884765625, 964.7861938476562, 570.81494140625, 1235.377685546875, 359.349853515625, 267.206787109375, 1904.07275390625, 142.37249755859375, 427.52166748046875, -260.5268249511719, -45.50406265258789, 300.1271667480469, 91.70156860351562, 482.9345397949219, -76.97799682617188, -164.1630859375, 653.2567749023438, 403.7304992675781, -104.30120849609375, 210.98121643066406, 489.49005126953125, 881.2625732421875, 165.6214599609375, 393.04541015625, 326.49053955078125, 186.14535522460938, 1157.40234375, 613.4019775390625, 464.71832275390625, 1274.9803466796875, 36.56732940673828, 1611.6041259765625, -833.2889404296875, 1255.635498046875, 39.06040954589844, 425.05682373046875, 1773.02099609375, 366.4396667480469, 158.33738708496094, 181.71022033691406, 1225.7991943359375, -165.62522888183594, 1817.8792724609375, 15.858970642089844, 155.7042694091797, 129.69775390625, -24.38675880432129], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 352.85003662109375, "std": 650.7028198242188, "min": -3085.7939453125, "p10": -101.37957458496093, "median": 227.15255737304688, "p90": 1158.6864013671875, "max": 1641.33154296875, "pos_frac": 0.8125, "sample": [1226.569091796875, -766.0640258789062, -3085.7939453125, 946.0610961914062, 316.35638427734375, 156.4810791015625, 98.90640258789062, 53.0772705078125, 1239.936279296875, 428.4707946777344, 677.6458740234375, 1354.85595703125, 122.14488220214844, 659.7339477539062, 166.56431579589844, 482.7020263671875, 643.1766967773438, -371.1524353027344, 482.48785400390625, 95.01991271972656, 396.0823669433594, 51.659263610839844, -219.17120361328125, 193.93389892578125, 201.0834503173828, 951.4923706054688, 1372.88916015625, 42.03696823120117, 682.86181640625, 221.71475219726562, 563.1939086914062, 232.59036254882812, 685.8366088867188, -225.40090942382812, 110.98103332519531, 662.626953125, 819.7740478515625, -82.69331359863281, 377.8329772949219, -176.42103576660156, 39.49592590332031, 502.8373718261719, 193.12466430664062, 1179.190185546875, 81.29206085205078, -24.430044174194336, 1020.9628295898438, 173.62330627441406, 453.29034423828125, 1136.4149169921875, -102.23263549804688, -99.38909912109375, 1168.2313232421875, 309.3837890625, 152.19259643554688, 175.3551025390625, 49.744667053222656, -5.699975967407227, 832.5451049804688, 1027.7530517578125, -67.9026107788086, 24.19719696044922, 1641.33154296875, 931.0095825195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 169.15634155273438, "std": 573.1990356445312, "min": -1148.5975341796875, "p10": -627.4779174804687, "median": 142.46807098388672, "p90": 896.7145812988283, "max": 1496.264404296875, "pos_frac": 0.65625, "sample": [1274.670654296875, -693.0975341796875, -418.6165771484375, 842.4219360351562, 864.1895751953125, -240.8014678955078, 351.1618347167969, 302.8984680175781, -445.7601013183594, 609.4368286132812, 1139.381591796875, -22.290836334228516, 421.1531677246094, 515.2486572265625, -508.18731689453125, 193.12167358398438, -471.741943359375, -400.74139404296875, 7.855804443359375, 527.67041015625, 823.896728515625, -1148.5975341796875, 758.78076171875, 539.00146484375, -667.4910278320312, -695.7398681640625, -744.4122924804688, 429.1756896972656, 77.3119888305664, 13.671218872070312, 645.4703979492188, 574.2536010742188, 84.69902801513672, -645.0127563476562, 1154.2926025390625, 943.1489868164062, -362.0665283203125, 984.237548828125, -163.82566833496094, 69.8899154663086, 163.10989379882812, 81.07174682617188, 596.484130859375, 370.29107666015625, 1496.264404296875, -586.5632934570312, -170.86721801757812, 38.780479431152344, -424.240966796875, -129.75616455078125, 121.82624816894531, 249.85801696777344, 214.9151153564453, 839.6854248046875, -716.215576171875, -200.24087524414062, -371.5205993652344, 116.70606994628906, 108.00013732910156, 633.8466796875, 198.67849731445312, 910.6538696289062, 574.2098388671875, 192.37069702148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 554.99072265625, "std": 729.8882446289062, "min": -891.7062377929688, "p10": -168.49250183105468, "median": 407.57090759277344, "p90": 1478.3590942382814, "max": 2797.724609375, "pos_frac": 0.78125, "sample": [733.5305786132812, 505.7882995605469, -570.16796875, -12.238941192626953, 149.47360229492188, 250.60360717773438, 214.001953125, 1325.9970703125, 1294.928955078125, 222.79147338867188, 1504.2249755859375, 1984.2764892578125, 846.1135864257812, 315.7859191894531, -295.2490539550781, 477.4444274902344, 811.7496948242188, -163.272216796875, 85.67927551269531, 1418.00537109375, -515.61376953125, 805.614990234375, 366.9095458984375, 632.7582397460938, -93.64865112304688, -891.7062377929688, 143.51968383789062, 406.2720947265625, 2797.724609375, 749.7777709960938, 677.60498046875, 958.0274047851562, -527.8639526367188, 586.9122314453125, 2268.619140625, 280.66864013671875, -134.74868774414062, 414.62347412109375, -65.87811279296875, 372.6094970703125, -50.31077575683594, 165.29676818847656, -23.703872680664062, 1234.9686279296875, 1134.148193359375, 244.40170288085938, 680.6023559570312, 862.926513671875, -375.8091735839844, 1093.568603515625, 1878.8656005859375, 408.8697204589844, -170.72976684570312, 959.6107177734375, 16.328292846679688, 574.4423828125, 399.034423828125, 1090.6844482421875, 35.64692687988281, 759.7240600585938, 1548.2706298828125, 2420.205078125, 145.4756622314453, 155.24014282226562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 321.3974609375, "std": 629.36279296875, "min": -1049.9219970703125, "p10": -532.2751708984375, "median": 308.52720642089844, "p90": 988.7254150390628, "max": 2757.60400390625, "pos_frac": 0.75, "sample": [285.2861022949219, 276.3135986328125, 436.5527038574219, 887.0320434570312, -393.7851867675781, -64.29965209960938, 17.3138427734375, 1090.235107421875, 583.385986328125, 194.21534729003906, -849.4622192382812, 665.43017578125, 83.55543518066406, -439.91461181640625, 2757.60400390625, -620.7064819335938, -603.238037109375, -514.3333129882812, 122.18284606933594, 847.5509033203125, -126.93692016601562, 102.5336685180664, 56.401363372802734, 502.88580322265625, 163.07638549804688, 595.4959716796875, -939.1958618164062, 627.974853515625, 474.259033203125, 232.4163055419922, 830.7382202148438, 537.9266967773438, 413.2099609375, 10.0516357421875, 130.46624755859375, -107.03457641601562, 1016.609375, 306.22357177734375, 570.6888427734375, 678.6675415039062, 207.5740966796875, -26.697141647338867, -119.40186309814453, -593.95947265625, 189.4908447265625, 770.0009155273438, -1049.9219970703125, 1072.7069091796875, 716.844482421875, 923.662841796875, 1794.25634765625, 652.0552368164062, -539.9645385742188, 629.3372802734375, 12.233116149902344, 329.3417663574219, 615.7841796875, -84.52568054199219, 540.928955078125, 310.8308410644531, 465.6314392089844, 1137.8953857421875, 661.1892700195312, 1116.768798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 412.79522705078125, "std": 797.368896484375, "min": -1303.2037353515625, "p10": -382.4710388183593, "median": 347.9495086669922, "p90": 1086.9995544433596, "max": 4087.33544921875, "pos_frac": 0.71875, "sample": [157.9517364501953, 125.90245056152344, 774.2841796875, 562.8862915039062, 215.01832580566406, -1303.2037353515625, 805.4765014648438, 412.8851623535156, -81.72603607177734, -430.7660217285156, -18.20667266845703, 150.82406616210938, 1423.735595703125, -295.92626953125, 694.4885864257812, 969.4974975585938, 460.215087890625, 928.5513305664062, -771.2241821289062, 77.47488403320312, 111.4212417602539, -79.91827392578125, 529.9578247070312, 18.034467697143555, 344.2728271484375, 648.1824340820312, -183.98141479492188, 163.87554931640625, 115.68182373046875, 351.6261901855469, 851.0472412109375, 639.918701171875, 857.5921020507812, 2300.95166015625, -247.50070190429688, -24.746490478515625, 550.029541015625, 665.7958984375, -427.312255859375, 477.4423522949219, 815.4285278320312, 234.73841857910156, 440.5832214355469, 1018.1389770507812, -408.88409423828125, 1116.51123046875, 747.4595947265625, 1463.475341796875, -240.5778350830078, -19.9971923828125, 4087.33544921875, 781.5763549804688, -629.3855590820312, 390.52630615234375, -188.13719177246094, -320.840576171875, 247.9659881591797, 2129.130615234375, 18.877288818359375, -944.750244140625, 487.0238037109375, 1570.03466796875, 160.008544921875, 942.1452026367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 305.1148986816406, "std": 527.81298828125, "min": -624.0858154296875, "p10": -321.34655151367184, "median": 227.777099609375, "p90": 954.7434875488282, "max": 1920.0308837890625, "pos_frac": 0.78125, "sample": [230.39932250976562, 118.11015319824219, 120.98721313476562, 689.08447265625, 840.7556762695312, 768.5068969726562, 483.329345703125, 124.42987823486328, 465.06488037109375, 673.1478881835938, 22.449613571166992, -509.96826171875, 311.6200866699219, 229.72708129882812, -222.06776428222656, 942.4072265625, 1249.73388671875, 960.0304565429688, 834.9874267578125, 348.9149169921875, 47.59059143066406, 47.09638977050781, -326.8437805175781, 152.37237548828125, 81.55165100097656, 73.798828125, -624.0858154296875, 536.7276611328125, 414.9803161621094, 1023.6175537109375, -564.9183349609375, 306.5942687988281, 74.2333984375, 125.2708740234375, 1014.185546875, 1169.05322265625, 119.94345092773438, -463.474853515625, 411.3127746582031, 225.82711791992188, -303.93597412109375, 1920.0308837890625, 892.4434204101562, 383.52001953125, 679.3290405273438, 109.93598175048828, 2.5927658081054688, 403.09344482421875, 1739.47509765625, 55.96615219116211, 704.9273681640625, 527.71240234375, 581.0904541015625, -252.20932006835938, 790.97705078125, 43.284278869628906, -490.56158447265625, -308.5196838378906, 352.9964904785156, -185.12582397460938, -232.80084228515625, 121.85897827148438, -535.0588989257812, -0.15161895751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 331.0244140625, "std": 653.6292724609375, "min": -1641.2340087890625, "p10": -352.4884124755859, "median": 230.2210464477539, "p90": 1260.9201293945314, "max": 2117.095947265625, "pos_frac": 0.75, "sample": [2117.095947265625, 103.92293548583984, 763.355224609375, -1641.2340087890625, 621.341064453125, 327.40631103515625, -634.7501220703125, 224.67982482910156, 630.0007934570312, 425.0335998535156, 52.130340576171875, -77.69053649902344, 74.74738311767578, 345.72869873046875, 190.75572204589844, 596.5369262695312, 263.42987060546875, -92.31463623046875, 206.97116088867188, -508.8748474121094, 192.23648071289062, 1808.3861083984375, 13.46870231628418, -545.9043579101562, 942.1428833007812, 951.33447265625, 4.980266571044922, 235.76226806640625, -318.63067626953125, 1396.8094482421875, -179.4805908203125, -266.9283142089844, 816.75537109375, -0.6956634521484375, 9.57786750793457, 0.8202457427978516, 1287.8084716796875, 399.3265075683594, 657.1676635742188, 258.0584716796875, -105.0723648071289, -366.9988708496094, 486.5260925292969, 22.700538635253906, 1053.6451416015625, 589.2748413085938, -216.14039611816406, -227.98345947265625, 91.71504211425781, 972.8109130859375, 1378.6064453125, 787.473388671875, 68.93869018554688, -932.4736938476562, 695.4491577148438, 1236.099609375, 318.3348388671875, 1271.5574951171875, 24.790435791015625, 330.8667297363281, 692.9907836914062, 1648.1658935546875, 170.069580078125, -457.0522155761719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 191.55389404296875, "std": 673.817626953125, "min": -1199.4468994140625, "p10": -624.6673522949218, "median": 116.18141555786133, "p90": 1094.7602783203126, "max": 2213.822021484375, "pos_frac": 0.65625, "sample": [284.8301086425781, 1115.5872802734375, -331.9162902832031, 1046.1639404296875, -1084.3375244140625, -519.0487060546875, -752.0596923828125, 138.2926025390625, 501.0125427246094, -203.9619598388672, 10.485706329345703, -43.17401123046875, 57.91948699951172, -7.393033981323242, -182.13943481445312, -642.5735473632812, 459.21014404296875, 250.83004760742188, 915.5591430664062, 1173.83203125, 536.96630859375, -416.8564453125, 151.1094970703125, 51.99342346191406, 126.79627990722656, 84.27775573730469, 799.2239990234375, 105.5665512084961, -1014.3844604492188, -1199.4468994140625, 166.7359161376953, 562.76416015625, -409.3061828613281, 1181.156005859375, 983.2443237304688, -72.94827270507812, 1728.8544921875, 893.1622314453125, -1090.5118408203125, -494.5531005859375, 1399.2476806640625, 12.876617431640625, 165.47393798828125, 13.437885284423828, 488.57568359375, 520.4681396484375, 451.96453857421875, 294.5052795410156, 322.86614990234375, 567.7145385742188, 50.44073486328125, -240.20677185058594, 1355.36279296875, 30.57593536376953, 487.5047607421875, 2213.822021484375, -36.897193908691406, 328.4271240234375, -918.8370361328125, -172.16647338867188, 548.9535522460938, 100.54854583740234, -3.285747528076172, -582.88623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 270.04473876953125, "std": 487.47430419921875, "min": -888.6386108398438, "p10": -218.3464782714844, "median": 185.31362915039062, "p90": 944.3240173339843, "max": 1626.8077392578125, "pos_frac": 0.734375, "sample": [963.01904296875, 1294.779052734375, -58.26184844970703, 941.7455444335938, 566.8680419921875, 160.11671447753906, 704.3429565429688, 664.4468383789062, 1063.8389892578125, 38.83073425292969, 248.85317993164062, 499.73565673828125, -810.7137451171875, -42.853302001953125, 748.5176391601562, 74.64759826660156, -208.98655700683594, 167.09500122070312, 898.4871215820312, -194.62384033203125, 307.3154296875, -214.95355224609375, -275.29742431640625, 308.2032165527344, -18.91153335571289, 314.8199768066406, 410.26080322265625, 258.6278076171875, 9.042940139770508, 777.5916748046875, 147.0211181640625, 446.7175598144531, 116.79025268554688, 229.24493408203125, -203.0298614501953, 175.5896759033203, 178.25830078125, 1067.793701171875, -59.97761535644531, -888.6386108398438, 301.27374267578125, 549.2640380859375, 128.27915954589844, -395.52764892578125, 71.13258361816406, -548.787109375, -42.70103454589844, 73.9841537475586, -218.4214324951172, 1626.8077392578125, 912.1213989257812, 142.57342529296875, 517.6796264648438, 99.90995788574219, 1070.1251220703125, -218.1715850830078, -489.92877197265625, 102.84854888916016, 196.4653778076172, 261.2933044433594, 725.9335327148438, 945.4290771484375, 472.55865478515625, 192.36895751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 372.22430419921875, "std": 493.4676208496094, "min": -995.9664306640625, "p10": -172.24453125, "median": 373.63597106933594, "p90": 979.615740966797, "max": 1826.12353515625, "pos_frac": 0.78125, "sample": [473.290283203125, 556.9097900390625, 225.42922973632812, 299.08831787109375, 195.96578979492188, 948.9971923828125, 400.2637939453125, 356.7223205566406, 556.8829345703125, 623.8859252929688, 1188.6319580078125, 200.41323852539062, 37.615692138671875, 1597.5809326171875, 642.1610107421875, 411.41888427734375, -22.891616821289062, 992.7379760742188, 663.3466796875, -206.82972717285156, 278.6086120605469, 195.29449462890625, -2.4703502655029297, -176.02587890625, 616.9810180664062, 329.6282958984375, -103.33824157714844, 1826.12353515625, -781.9835205078125, 656.0480346679688, -210.94256591796875, -95.74152374267578, 137.19949340820312, 783.2796020507812, 897.3524780273438, 566.8590087890625, 503.78375244140625, 483.37396240234375, -517.851806640625, 258.6798095703125, 605.040283203125, 13.580436706542969, 1192.3428955078125, -163.42138671875, -27.47707748413086, 350.3880615234375, 436.1455383300781, 390.54962158203125, 27.329322814941406, 1180.093994140625, 167.5413055419922, 46.15135955810547, 996.87939453125, 259.282470703125, -79.22773742675781, 553.6827392578125, -995.9664306640625, 803.8260498046875, -211.74478149414062, 390.96685791015625, 552.5472412109375, 769.0676879882812, 494.6059875488281, 283.6920471191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 527.5923461914062, "std": 700.7243041992188, "min": -845.380126953125, "p10": -121.43109283447265, "median": 478.1593780517578, "p90": 1462.1847534179688, "max": 3389.26416015625, "pos_frac": 0.765625, "sample": [559.67919921875, 170.23031616210938, 298.38134765625, 361.1982116699219, 303.6875915527344, 811.721923828125, 1772.1612548828125, 1464.203125, 525.0306396484375, 1315.9285888671875, -69.63906860351562, -781.8983154296875, 6.522857666015625, 736.9872436523438, 305.912353515625, -258.01654052734375, 1280.2786865234375, 517.2583618164062, 89.98210144042969, -159.7572021484375, -10.730268478393555, 609.317626953125, -86.57984161376953, 794.1202392578125, -122.24235534667969, 1539.4644775390625, 554.76123046875, 98.22966003417969, -43.43128967285156, -11.601860046386719, 787.8772583007812, 543.742919921875, -79.0903549194336, 3389.26416015625, 612.075927734375, 560.870361328125, -845.380126953125, 471.7160339355469, 37.55511474609375, -444.4939270019531, -119.53814697265625, 256.52679443359375, 1620.1949462890625, 484.60272216796875, 578.021240234375, 456.82611083984375, 1032.643310546875, -78.0831298828125, 542.74755859375, 820.4888305664062, 1070.7452392578125, 419.52789306640625, 233.37008666992188, 544.679443359375, 1457.4752197265625, 162.8743896484375, 569.6358642578125, 1002.66162109375, 389.4872741699219, 1934.1966552734375, 1174.1502685546875, 237.50216674804688, -429.2403259277344, 1799.114990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 270.5200500488281, "std": 605.825927734375, "min": -779.5247802734375, "p10": -457.83247070312495, "median": 255.17202758789062, "p90": 996.4988586425784, "max": 2831.427734375, "pos_frac": 0.65625, "sample": [323.23089599609375, 19.11994171142578, 423.608154296875, -103.15133666992188, 385.0780334472656, -467.0740966796875, -16.14193344116211, 242.58331298828125, 1027.3197021484375, 395.86138916015625, 616.0335693359375, -211.812255859375, -518.0514526367188, 267.7607421875, -432.9786071777344, -104.71151733398438, 113.98286437988281, 444.6075744628906, -338.8114318847656, 282.04376220703125, 646.07470703125, 47.409645080566406, -616.4559936523438, -244.3651885986328, 900.5189208984375, -672.0391235351562, 760.8475341796875, 753.3548583984375, 126.56367492675781, 514.415771484375, 389.6557922363281, -336.4423522949219, 183.5943603515625, -41.100425720214844, 734.1885986328125, 538.1591796875, 765.6727294921875, 337.5736083984375, 834.3137817382812, -279.2371826171875, 1068.604736328125, 278.3616027832031, 14.316097259521484, 1083.516845703125, 635.6727294921875, -133.00643920898438, 1326.571533203125, 2831.427734375, 218.9718017578125, 456.43023681640625, -22.46836280822754, 452.63531494140625, -10.485885620117188, 924.5835571289062, -317.45654296875, 1156.4544677734375, 1159.4521484375, -471.39263916015625, 23.703807830810547, 13.620904922485352, -779.5247802734375, -631.9130859375, 780.2764892578125, -436.2686767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 418.1771545410156, "std": 652.5241088867188, "min": -1053.293212890625, "p10": -329.64090576171867, "median": 388.88116455078125, "p90": 1088.378503417969, "max": 2250.052001953125, "pos_frac": 0.765625, "sample": [339.4984130859375, -357.96893310546875, 709.9093017578125, 446.73956298828125, 166.08236694335938, 904.0177001953125, 309.48211669921875, -937.0154418945312, 2111.033203125, 546.457763671875, 1063.203369140625, -509.39453125, 1233.72314453125, 986.1679077148438, 802.8511352539062, 1036.2650146484375, 402.2916259765625, 1730.1124267578125, 984.6199951171875, 2250.052001953125, 932.9273071289062, 46.34021759033203, 1099.1678466796875, 508.4084167480469, 183.43946838378906, -36.0941047668457, -173.24615478515625, 613.2373046875, 199.97396850585938, 461.8570861816406, 593.2673950195312, 44.47416687011719, -235.98788452148438, -975.5809326171875, -263.54217529296875, 393.1487731933594, -70.20149230957031, 1630.6029052734375, -180.7534942626953, 37.95062255859375, 556.5808715820312, 288.2735900878906, -561.7645874023438, 686.9288940429688, 384.6135559082031, 648.716796875, 88.14088439941406, 713.7296142578125, 285.28271484375, 1350.510986328125, 382.2536926269531, 289.0118408203125, 664.97314453125, 1008.9486083984375, 649.2509155273438, -41.63788604736328, 469.02459716796875, -261.29107666015625, -1053.293212890625, 307.1089172363281, 917.55224609375, 136.03570556640625, -371.6535339355469, 198.52243041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 508.00433349609375, "std": 563.6129760742188, "min": -524.3338012695312, "p10": -82.66327285766596, "median": 416.0706024169922, "p90": 1388.0180786132814, "max": 1880.7452392578125, "pos_frac": 0.875, "sample": [898.5156860351562, 467.81512451171875, 1880.2060546875, 420.4022216796875, -305.1790771484375, 6.2763671875, 358.0875244140625, 558.822998046875, 106.47151184082031, 224.6243133544922, 1115.1168212890625, 1880.7452392578125, 277.9555969238281, -155.66802978515625, 120.8663330078125, 117.98233795166016, 11.72287368774414, 371.21746826171875, -247.64004516601562, 822.8836669921875, 1418.6640625, 160.4932098388672, 996.022216796875, 271.01898193359375, 586.0817260742188, 67.97679138183594, -110.17510986328125, -135.87493896484375, 65.6292724609375, 607.1939697265625, 180.20286560058594, -18.46898651123047, 1052.9796142578125, 225.39151000976562, 1465.2393798828125, 645.1199951171875, 476.21051025390625, 32.532470703125, 609.154052734375, 2.777008056640625, 411.7389831542969, 1236.9560546875, -338.90325927734375, 534.0172729492188, 679.2501831054688, 595.2738647460938, 5.25927734375, 1110.230712890625, 12.458110809326172, 467.27398681640625, 157.317626953125, 274.385009765625, 1398.6141357421875, 1649.0694580078125, 529.5674438476562, -524.3338012695312, 904.3202514648438, 342.484130859375, 1820.90869140625, 449.1461486816406, 918.1785278320312, 1363.2939453125, 791.399169921875, 194.97891235351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 364.4455871582031, "std": 617.27587890625, "min": -1500.5904541015625, "p10": -238.59774475097655, "median": 289.5014953613281, "p90": 1142.765869140625, "max": 2403.134765625, "pos_frac": 0.765625, "sample": [401.7489929199219, 171.28408813476562, 878.6781616210938, -770.6879272460938, 361.9922790527344, 315.66009521484375, 246.87266540527344, -491.0096740722656, 751.78662109375, 164.99783325195312, 799.2627563476562, 1341.5135498046875, 1142.63427734375, -1500.5904541015625, 102.90108489990234, 318.9101257324219, -42.89837646484375, 738.519775390625, 1272.9493408203125, -462.3617248535156, 736.1959838867188, 413.1788024902344, 96.76505279541016, -244.2430419921875, -13.016256332397461, -4.540771484375, 775.6885986328125, -186.71214294433594, 446.9930114746094, 73.67340850830078, 255.8600311279297, 2403.134765625, -158.06027221679688, 1835.30859375, 240.00384521484375, 1201.7041015625, 42.77946472167969, 78.30523681640625, 61.30322265625, 45.413360595703125, 449.3541259765625, 30.82115936279297, 193.28274536132812, -115.5129165649414, 899.8670043945312, 1047.3636474609375, 871.927001953125, -225.42538452148438, 355.1925048828125, 536.850830078125, 1512.9093017578125, 26.807506561279297, 760.0674438476562, 237.31484985351562, -260.6706848144531, 393.1500549316406, 1142.822265625, -528.5153198242188, 312.83392333984375, 886.310302734375, 346.496826171875, 385.38848876953125, 266.1690673828125, -42.184593200683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 350.8651428222656, "std": 599.3539428710938, "min": -1024.2269287109375, "p10": -347.0454956054687, "median": 295.104736328125, "p90": 1158.04716796875, "max": 1912.76123046875, "pos_frac": 0.703125, "sample": [666.4263305664062, -11.193206787109375, -165.3617706298828, 1683.111083984375, 213.1702423095703, -376.8810729980469, 86.27180480957031, -297.20458984375, 98.2022705078125, 634.160888671875, 825.9764404296875, -232.60023498535156, -84.96544647216797, 1318.72412109375, 786.5765991210938, 1146.281494140625, 663.44287109375, 1211.23095703125, 1163.089599609375, -261.33404541015625, 114.94430541992188, 380.5855407714844, 1657.234619140625, 399.65972900390625, 317.93548583984375, 824.3536376953125, 961.5698852539062, 163.10260009765625, 104.66019439697266, -508.8380432128906, 1127.9840087890625, 293.82952880859375, 494.42218017578125, -1024.2269287109375, -219.96688842773438, -170.4892120361328, 319.15924072265625, 692.6702880859375, -445.8084411621094, 1782.5264892578125, 1912.76123046875, 659.8533325195312, -12.508974075317383, -77.97005462646484, 639.2139892578125, 468.474365234375, 20.01934814453125, -368.4058837890625, 356.4052734375, 226.25625610351562, 374.1947937011719, 113.29472351074219, 789.8038940429688, 190.42416381835938, 533.7174682617188, -453.89495849609375, -415.0420227050781, 296.37994384765625, 694.577392578125, 82.76942443847656, 262.5353088378906, -223.63661193847656, 306.5937194824219, -252.87974548339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 337.2967529296875, "std": 538.356689453125, "min": -587.362060546875, "p10": -275.62715148925776, "median": 254.6338653564453, "p90": 1123.8900390625006, "max": 1786.88720703125, "pos_frac": 0.703125, "sample": [99.41716766357422, 445.56585693359375, -291.71539306640625, 297.122802734375, 503.83685302734375, -128.2346649169922, -204.69708251953125, 282.266357421875, 38.92494583129883, 644.889404296875, 610.0687866210938, -187.24319458007812, 39.83208465576172, 453.4559326171875, -326.71600341796875, -155.551513671875, -42.19799041748047, 1389.8970947265625, 902.259033203125, 530.7542114257812, 612.5630493164062, 1671.3382568359375, 361.3529052734375, 240.20086669921875, 954.888916015625, 575.0438232421875, -153.8689422607422, 501.84124755859375, 55.892486572265625, -150.656982421875, -51.00337219238281, 1308.35986328125, 428.72393798828125, 31.765045166015625, 36.9356689453125, 57.13245391845703, 827.331787109375, 267.2708435058594, 247.51632690429688, 166.27157592773438, 719.0073852539062, 659.92138671875, 745.5188598632812, -238.08792114257812, -587.362060546875, 808.8588256835938, 261.75140380859375, 1405.13720703125, 1196.319091796875, -159.80506896972656, 207.26824951171875, 1290.46337890625, 642.5653076171875, 163.00112915039062, -42.53862762451172, -402.4109802246094, 607.0953979492188, -368.0460205078125, 1786.88720703125, 881.9683837890625, 64.30587768554688, -319.71441650390625, -489.8858947753906, -136.06182861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 370.9095458984375, "std": 589.3904418945312, "min": -698.0243530273438, "p10": -351.78571166992174, "median": 251.06631469726562, "p90": 1113.513513183594, "max": 2119.296142578125, "pos_frac": 0.71875, "sample": [-395.96929931640625, -628.963134765625, 1124.7447509765625, 225.17996215820312, 44.81092071533203, 1098.282958984375, 677.7154541015625, 701.6171875, 408.1421203613281, -248.690673828125, 449.964111328125, 80.99943542480469, -49.3211669921875, -556.071533203125, 9.254144668579102, 873.6676025390625, -186.08871459960938, 1053.7703857421875, -243.68601989746094, 1271.40576171875, -31.04345703125, 405.0793762207031, 963.5380859375, -489.5082092285156, -407.62091064453125, 489.8963623046875, 317.2025451660156, 228.88296508789062, -64.31071472167969, 247.6475830078125, -137.87490844726562, 1556.403076171875, 127.37428283691406, 260.97015380859375, 634.3327026367188, 896.4078369140625, -698.0243530273438, 570.7724609375, 752.1029052734375, 868.100830078125, 250.3131103515625, 10.756446838378906, 469.8551940917969, 2119.296142578125, 1632.9500732421875, -14.676956176757812, 416.3748474121094, -410.3113098144531, 1069.5101318359375, 534.8446655273438, -162.9742889404297, 53.9971923828125, 251.81951904296875, 146.9916229248047, 1120.0408935546875, 236.86643981933594, 1673.6163330078125, -69.34114074707031, -106.80085754394531, 503.71295166015625, 627.2197875976562, 181.32850646972656, 171.63836669921875, 830.0901489257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 585.0487060546875, "std": 604.0172119140625, "min": -470.07763671875, "p10": -94.66459884643551, "median": 457.98353576660156, "p90": 1393.2496215820313, "max": 2389.77294921875, "pos_frac": 0.84375, "sample": [1389.3243408203125, 797.6565551757812, -470.07763671875, -18.462970733642578, 50.30329132080078, 394.90850830078125, 361.24664306640625, 623.1124877929688, 356.3560485839844, 774.4447631835938, 705.607177734375, 621.8060913085938, 261.180419921875, 156.97012329101562, 496.6745910644531, 1394.931884765625, -150.14556884765625, 99.113525390625, 1345.1806640625, 158.7099609375, 604.3612060546875, -53.7044677734375, 1945.7855224609375, -169.27407836914062, -145.51150512695312, 189.8316650390625, 748.6605224609375, 1217.572998046875, 292.20257568359375, 1072.737548828125, 420.5996398925781, 2389.77294921875, 474.9412536621094, -117.39222717285156, 1793.7872314453125, 744.972412109375, 441.02581787109375, 704.5638427734375, 1647.22265625, 223.46145629882812, 1747.0750732421875, 8.166091918945312, 1228.115478515625, 115.171142578125, 1146.1151123046875, 717.5950317382812, 320.79400634765625, 1801.052978515625, 303.08056640625, 508.3082580566406, 922.8456420898438, 260.8175964355469, -112.21894073486328, 185.82298278808594, 1209.1561279296875, 212.99673461914062, -49.25953674316406, 247.30169677734375, -228.4940185546875, 988.5474243164062, 646.9059448242188, 27.733530044555664, 744.3917236328125, 716.64013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 496.7813720703125, "std": 626.5299072265625, "min": -1127.7767333984375, "p10": -138.31977081298825, "median": 418.05841064453125, "p90": 1294.6465087890629, "max": 2291.47265625, "pos_frac": 0.84375, "sample": [883.1636962890625, 47.078094482421875, 1011.0038452148438, 441.83502197265625, 1344.3067626953125, 1584.5164794921875, 801.7686157226562, 136.4709930419922, 786.0421752929688, -196.951171875, -253.0889892578125, 416.0653076171875, 1186.11181640625, -110.30155944824219, 250.7606658935547, 344.42254638671875, 112.15583801269531, 396.8563537597656, 128.84246826171875, 1429.1658935546875, 2226.462646484375, 416.8896484375, 170.54470825195312, 2291.47265625, 915.8070068359375, 1341.161376953125, 199.72630310058594, -24.4375, 588.6551513671875, 626.1956787109375, 934.15380859375, 449.3574523925781, 51.53556823730469, -1127.7767333984375, 600.545654296875, 491.21923828125, 236.3851776123047, -150.32757568359375, 12.045440673828125, 310.7152404785156, 597.25244140625, -853.849609375, -344.55499267578125, 632.6195678710938, 952.4947509765625, 46.12548065185547, 337.9781799316406, 611.354248046875, 565.7543334960938, 55.43659210205078, 711.7752685546875, 920.3786010742188, 159.9989471435547, 810.98193359375, 516.9461059570312, 267.0054016113281, 951.8579711914062, 2201.594482421875, 367.1414489746094, -169.05166625976562, 188.53890991210938, -103.23072814941406, 649.6768798828125, 419.2271728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 365.7066955566406, "std": 533.568359375, "min": -1000.6300048828125, "p10": -287.47199096679685, "median": 312.38018798828125, "p90": 1035.353576660157, "max": 1701.0584716796875, "pos_frac": 0.75, "sample": [271.24700927734375, 711.3800048828125, 489.8952941894531, -276.940673828125, 195.0873565673828, -113.18359375, 1209.951416015625, 279.7594909667969, -403.98443603515625, 389.0274658203125, 487.8245849609375, 1312.60888671875, -155.80470275878906, 296.7616271972656, 300.366943359375, -566.5992431640625, 269.5782165527344, 469.2017517089844, 192.73568725585938, -20.930004119873047, 537.43994140625, 163.616455078125, 324.3934326171875, 158.97837829589844, -1000.6300048828125, 878.8048095703125, -442.4701232910156, 659.6658325195312, 741.7752075195312, -197.8599853515625, 60.19642639160156, 238.44134521484375, -28.563724517822266, 523.3805541992188, 235.7613983154297, 1701.0584716796875, -4.800363540649414, 1100.27197265625, 621.123779296875, 873.7579956054688, 211.8130645751953, 327.9552917480469, 542.0535888671875, 1100.9290771484375, 883.8773193359375, 546.3814697265625, -55.278961181640625, 238.11465454101562, 854.3204345703125, 689.6251220703125, -458.5084228515625, 854.3112182617188, -270.803466796875, 1522.9593505859375, 763.5562744140625, 847.96435546875, 509.09393310546875, 1426.95361328125, -291.98541259765625, 710.1882934570312, 7.6744537353515625, 472.1129455566406, 61.35832977294922, -571.7616577148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 390.3408203125, "std": 517.9976196289062, "min": -541.9475708007812, "p10": -225.46707305908203, "median": 333.22373962402344, "p90": 1124.4968994140625, "max": 1783.458740234375, "pos_frac": 0.75, "sample": [626.9822998046875, -113.04660034179688, 185.47723388671875, 902.305419921875, -32.42667770385742, -283.7489013671875, 1128.1446533203125, -222.3114013671875, -516.8744506835938, -126.43278503417969, 1083.9447021484375, 391.5379638671875, 478.1400146484375, -198.02989196777344, 60.45722961425781, 155.55474853515625, 537.0379638671875, 304.8327331542969, 278.8726806640625, 309.8473205566406, 324.74969482421875, 674.7913208007812, -367.9934997558594, 1783.458740234375, -541.9475708007812, 1442.111572265625, 450.8661804199219, 373.1081848144531, 1381.0855712890625, 271.29083251953125, 349.8674621582031, 341.6977844238281, 0.9011039733886719, -3.1601104736328125, 728.6168212890625, 212.6723175048828, 143.8146209716797, -1.9650230407714844, 107.175048828125, 1136.4134521484375, -347.3363037109375, -12.875694274902344, 591.9086303710938, -226.8195037841797, 582.042724609375, 236.55426025390625, -28.71776580810547, 551.1411743164062, 1525.03564453125, 1634.625244140625, 661.3396606445312, 199.2396697998047, 622.7054443359375, 566.5626220703125, 537.2540283203125, 186.95591735839844, 763.408935546875, 1115.9854736328125, 604.13134765625, -345.8826599121094, 820.578125, 152.75221252441406, 366.231689453125, 467.17138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 457.5646057128906, "std": 573.674072265625, "min": -765.3089599609375, "p10": -237.80464630126949, "median": 425.43421936035156, "p90": 1201.5145996093752, "max": 1891.8614501953125, "pos_frac": 0.78125, "sample": [-293.8214111328125, 219.42123413085938, 952.5704956054688, 341.7032165527344, 363.82073974609375, 442.81463623046875, 673.12353515625, 787.3494873046875, 1017.3613891601562, 819.92919921875, 211.04022216796875, -337.88262939453125, 1021.4939575195312, 483.89544677734375, 674.2647705078125, 1601.1005859375, -106.43644714355469, -16.363662719726562, -184.5222625732422, -291.3536071777344, 1167.8828125, 1391.336181640625, 69.63129425048828, 357.7319641113281, 785.3069458007812, 1891.8614501953125, 382.5014953613281, -260.63995361328125, 747.66650390625, 466.27374267578125, 82.8083724975586, 1215.92822265625, -761.6605834960938, 1890.988037109375, 204.88363647460938, 459.6868896484375, 889.3187866210938, 55.39381408691406, 444.4716796875, 673.4564819335938, 215.1752471923828, 189.41136169433594, 573.160888671875, 732.2487182617188, -91.34121704101562, -6.719635009765625, 3.4385452270507812, 1442.11279296875, 817.3244018554688, 1081.6890869140625, -127.76780700683594, 136.29446411132812, -522.2772827148438, 697.167724609375, 900.05615234375, 447.59832763671875, 161.81298828125, 408.0538024902344, 604.1678466796875, 1278.1226806640625, -41.7711181640625, -765.3089599609375, 261.74853515625, 355.39996337890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 428.96954345703125, "std": 562.321044921875, "min": -784.0781860351562, "p10": -65.04996414184569, "median": 312.2176513671875, "p90": 1068.642932128907, "max": 2498.8212890625, "pos_frac": 0.859375, "sample": [214.2206573486328, 107.79711151123047, 694.1345825195312, 112.05430603027344, 286.9602355957031, 77.67912292480469, 553.4697265625, -48.496009826660156, 653.23681640625, 300.52716064453125, 1161.4434814453125, 272.7250061035156, 762.2526245117188, 697.765869140625, 2498.8212890625, 795.84228515625, 231.88809204101562, 133.1661376953125, -685.04736328125, 100.99693298339844, -107.19108581542969, 807.2865600585938, 210.84817504882812, 264.7082214355469, 144.00250244140625, 215.01271057128906, 377.1780700683594, 890.7567138671875, 1194.6298828125, 89.3128662109375, 323.90814208984375, 528.9844970703125, 263.7038269042969, 1144.8798828125, 470.5110778808594, 325.5173645019531, 228.84423828125, 274.5989685058594, 88.69183349609375, 438.93328857421875, -523.459228515625, 417.359130859375, -72.14451599121094, 784.5238647460938, 736.1427001953125, -417.68505859375, 783.5819091796875, 690.874267578125, 45.12433624267578, 525.7432250976562, 2421.642578125, -784.0781860351562, 298.70208740234375, 64.73014831542969, 752.418701171875, 1261.75390625, 63.75811004638672, 589.484619140625, -162.117431640625, 1409.212158203125, 483.8114013671875, 477.91485595703125, 529.080322265625, -18.87859344482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 311.1284484863281, "std": 636.546142578125, "min": -1335.5274658203125, "p10": -413.29617309570307, "median": 383.9259033203125, "p90": 1057.4017944335938, "max": 1995.8516845703125, "pos_frac": 0.703125, "sample": [-232.71017456054688, 856.1669921875, 1165.87841796875, -1053.7205810546875, 560.5618896484375, -377.51239013671875, 528.201171875, 580.1253662109375, 1995.8516845703125, -5.693809509277344, 980.6749267578125, -428.632080078125, -1335.5274658203125, -326.62127685546875, 917.8461303710938, 112.48448944091797, 4.132314682006836, 365.12298583984375, 1968.9677734375, 683.0054931640625, 1046.4801025390625, -243.01568603515625, 456.29931640625, -95.93115234375, 226.75732421875, 790.7217407226562, 108.57334899902344, 1062.08251953125, 286.2462463378906, 17.20977783203125, 318.42529296875, -216.63629150390625, 434.13836669921875, -241.24411010742188, 624.1498413085938, 902.40869140625, 498.1583557128906, 504.5406188964844, 510.4718322753906, 628.2568359375, 40.12306213378906, 140.0865020751953, 903.3607177734375, 1458.0672607421875, 550.9244995117188, 1130.918701171875, -719.9848022460938, 402.72882080078125, 1079.697021484375, 316.87939453125, -243.860595703125, 18.78872299194336, -104.75376892089844, 402.880615234375, -704.8016357421875, -230.10736083984375, 537.6246337890625, 522.5694580078125, 272.5969543457031, 484.4006652832031, 530.9522094726562, -875.4058227539062, -460.11578369140625, -118.04316711425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 414.7147216796875, "std": 667.9571533203125, "min": -1686.3082275390625, "p10": -145.73981552124022, "median": 300.03623962402344, "p90": 1198.0762573242191, "max": 3235.951416015625, "pos_frac": 0.765625, "sample": [803.9341430664062, 516.9276123046875, 829.0664672851562, -6.816743850708008, 330.59112548828125, 730.0675659179688, 85.84672546386719, 497.4420471191406, -30.88916015625, 223.50161743164062, 166.0411376953125, 196.97222900390625, 1496.14697265625, 800.8616943359375, 86.72959899902344, 949.12353515625, 906.0299072265625, 120.51575469970703, -1686.3082275390625, 358.97283935546875, 252.58551025390625, 652.2835083007812, -408.9495849609375, 453.3460388183594, 567.5702514648438, 458.4178466796875, 666.4046630859375, 178.69515991210938, 441.38134765625, 3235.951416015625, -183.0933380126953, 1118.39990234375, 1406.3944091796875, 18.021133422851562, 418.960693359375, 1437.2784423828125, 1495.1121826171875, 100.06239318847656, -0.7183208465576172, 1278.586181640625, 1098.7359619140625, -156.02957153320312, 89.975341796875, 343.6429443359375, 43.202392578125, 605.7432250976562, 920.5077514648438, 149.99464416503906, 121.34235382080078, 1232.2232666015625, 269.4813537597656, -9.052162170410156, 1064.6488037109375, -997.7127075195312, 736.3367919921875, -55.3251838684082, -375.4250793457031, -121.73038482666016, 236.90274047851562, 523.0409545898438, -169.0496063232422, -37.25556182861328, -101.04159545898438, 167.14157104492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 473.0594482421875, "std": 623.6787109375, "min": -844.6680908203125, "p10": -167.47296905517575, "median": 411.37840270996094, "p90": 1322.1712890625, "max": 2351.69580078125, "pos_frac": 0.8125, "sample": [1182.353515625, 90.95794677734375, 519.2047729492188, 771.6940307617188, 986.9962158203125, 18.686058044433594, 277.4992370605469, 1705.939697265625, -36.601829528808594, 1510.2889404296875, -443.256103515625, 763.9179077148438, 439.29864501953125, 332.69195556640625, 629.8287963867188, 197.7762908935547, -844.6680908203125, 472.06170654296875, 72.03665924072266, 477.2489013671875, 1323.525390625, 1319.01171875, 103.9307861328125, 1219.0947265625, 7.316501617431641, 9.623931884765625, 1577.3681640625, -132.61911010742188, 488.0291748046875, -592.4276123046875, 956.0029296875, 193.20388793945312, 161.61663818359375, 1065.1534423828125, 436.172607421875, 395.6178894042969, -693.4473266601562, 204.11288452148438, 705.6204833984375, 1075.619140625, 55.08924865722656, 570.699951171875, 572.3206176757812, 379.8359375, 1045.8927001953125, 2351.69580078125, 814.7020263671875, 261.7123718261719, 1706.3406982421875, -133.58087158203125, -97.98757934570312, 1347.88671875, 796.3892822265625, 158.60533142089844, 178.41683959960938, 442.2701721191406, 395.5904235839844, 360.26031494140625, -181.99815368652344, -225.58749389648438, 427.138916015625, -3.640899658203125, 737.2340698242188, -629.9640502929688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 379.10247802734375, "std": 470.76007080078125, "min": -450.7479248046875, "p10": -160.36721038818357, "median": 349.40184020996094, "p90": 1006.6822814941409, "max": 1564.278564453125, "pos_frac": 0.78125, "sample": [-128.39437866210938, 5.130702972412109, 177.25473022460938, 416.3321228027344, -109.45816802978516, 401.6454772949219, 745.59912109375, 11.12750244140625, 1553.210205078125, 399.156494140625, 277.2639465332031, 515.8108520507812, 218.37533569335938, -252.08648681640625, 550.9357299804688, 14.447053909301758, -219.56234741210938, 293.2596740722656, -12.555526733398438, 463.63751220703125, 627.1746826171875, 201.72657775878906, 441.1800842285156, 386.6258239746094, -169.0977020263672, 368.7602233886719, -315.05621337890625, 1426.6568603515625, 814.1134033203125, -40.27164077758789, 498.0992431640625, 320.175048828125, 558.5218505859375, 473.6369323730469, 1243.65234375, 227.75787353515625, 1026.2391357421875, 493.0888671875, 385.1188049316406, 196.18893432617188, 268.9211120605469, 1211.50439453125, 598.415283203125, -450.7479248046875, 300.65692138671875, 934.8072509765625, 207.7102813720703, -388.02496337890625, 387.3004150390625, 961.0496215820312, 184.74734497070312, 330.04345703125, 600.21728515625, 123.39480590820312, 1552.12109375, -260.4854736328125, 561.3409423828125, -122.18766021728516, 124.04403686523438, 791.8174438476562, 1564.278564453125, -81.00022888183594, 517.21044921875, -139.99606323242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 407.4012451171875, "std": 554.6376342773438, "min": -903.4408569335938, "p10": -269.9988830566406, "median": 334.3314514160156, "p90": 1103.811645507813, "max": 2093.56298828125, "pos_frac": 0.796875, "sample": [786.0115966796875, 395.06756591796875, 681.7837524414062, 484.9320068359375, -458.03985595703125, 501.75714111328125, 985.9401245117188, 870.6636962890625, 141.28192138671875, 921.6429443359375, -50.251197814941406, 254.57284545898438, 302.6307678222656, -239.9561767578125, 40.06865692138672, 298.8035888671875, 231.261962890625, -311.680419921875, -115.731201171875, -248.4609375, 1212.0626220703125, -23.71660614013672, 940.4512329101562, 422.73876953125, 1965.4476318359375, 78.39970397949219, 698.447265625, 342.2730712890625, -43.712318420410156, -550.1466674804688, 242.01785278320312, 558.1160888671875, 168.86273193359375, 1140.7421875, 791.740234375, 532.0172729492188, 1017.640380859375, -279.22943115234375, 390.593505859375, 553.133544921875, 32.64146041870117, 878.9098510742188, -305.2282409667969, 1303.245361328125, 326.38983154296875, 323.68878173828125, -903.4408569335938, 1213.4036865234375, 357.7195739746094, 51.233917236328125, 385.27459716796875, 525.04541015625, 1016.30029296875, 67.32763671875, 403.25, 178.28280639648438, 256.56585693359375, 298.8075866699219, 2093.56298828125, 158.4457550048828, -489.4026184082031, 763.5062866210938, 321.1541442871094, 1186.8184814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 436.495361328125, "std": 581.4351806640625, "min": -852.1035766601562, "p10": -334.37768859863263, "median": 426.92559814453125, "p90": 1177.462719726563, "max": 1867.2724609375, "pos_frac": 0.796875, "sample": [253.23196411132812, -852.1035766601562, 646.2884521484375, -713.0870971679688, -122.47366333007812, 703.1363525390625, 785.2711181640625, 538.0785522460938, 394.365478515625, 1213.846435546875, 236.1561279296875, -10.048980712890625, 113.18070220947266, 459.4857177734375, 338.07843017578125, -530.5081176757812, 623.585693359375, 523.7679443359375, 1671.9522705078125, 1014.3939819335938, 837.2343139648438, 256.0885314941406, 1020.1258544921875, 92.45035552978516, 654.4534912109375, 348.55767822265625, 1010.5028686523438, -40.90831756591797, 727.88720703125, 836.6058959960938, 584.6922607421875, 504.0137939453125, 1092.5673828125, 535.5794677734375, 330.5431823730469, -479.5893249511719, 285.53619384765625, 376.01641845703125, 461.5184020996094, 29.05077362060547, 13.187210083007812, 1545.1356201171875, 1298.6995849609375, -4.472572326660156, 747.8839111328125, 1481.226318359375, 720.3805541992188, 195.03695678710938, 72.2159423828125, 781.3553466796875, 79.11080169677734, 597.9171752929688, -417.2777099609375, 109.58290100097656, 1867.2724609375, 1592.943603515625, 82.25409698486328, 726.7158813476562, -475.85345458984375, 253.55520629882812, -603.9942626953125, -140.94430541992188, -131.64895629882812, 795.8965454101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 330.4443359375, "std": 672.9450073242188, "min": -1125.895263671875, "p10": -477.1814697265625, "median": 311.44708251953125, "p90": 1110.8282714843751, "max": 2463.436767578125, "pos_frac": 0.703125, "sample": [565.181884765625, 344.398193359375, 1276.36865234375, -1125.895263671875, 149.4272003173828, 748.8115844726562, 973.52392578125, 199.8466033935547, 1128.84716796875, 354.6030578613281, -413.85675048828125, 608.1962280273438, 111.12517547607422, 75.42083740234375, 433.73785400390625, 895.2646484375, 14.361038208007812, 18.338943481445312, -274.3292236328125, -607.8389282226562, -798.6942138671875, 937.6259765625, -477.5413818359375, 710.5238037109375, -39.331947326660156, 769.7296752929688, 319.5639953613281, 494.1966247558594, 70.60777282714844, 657.1661376953125, 303.3301696777344, 526.0499267578125, 915.9400024414062, -493.6481628417969, 885.8847045898438, 431.86907958984375, -640.5137939453125, 1068.7841796875, 113.48026275634766, -179.75131225585938, 294.02349853515625, -149.02728271484375, 1366.310302734375, -47.23821258544922, -476.3416748046875, 1020.4067993164062, -60.57366943359375, -295.12408447265625, 250.6790771484375, -42.72113037109375, 370.69561767578125, 338.65740966796875, 425.21649169921875, 232.33065795898438, -158.28396606445312, 1144.086669921875, 364.3506774902344, 354.30267333984375, 2161.3779296875, -1098.3223876953125, -53.884849548339844, 1608.3846435546875, 84.89059448242188, 2463.436767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 455.4132995605469, "std": 613.8062133789062, "min": -1246.0419921875, "p10": -163.00654373168942, "median": 359.988525390625, "p90": 1247.9069213867188, "max": 2314.28466796875, "pos_frac": 0.828125, "sample": [193.23313903808594, 56.072242736816406, -36.931785583496094, 1246.3453369140625, -1069.0465087890625, 320.26910400390625, 561.9607543945312, 759.6722412109375, 312.7893371582031, -114.87786102294922, 2314.28466796875, 106.87472534179688, 24.368789672851562, 312.1123962402344, -289.9076232910156, 381.6173095703125, 1331.040283203125, -124.83605194091797, 85.75629425048828, 246.8917694091797, 612.9007568359375, 804.78466796875, 1619.6171875, 916.5255737304688, 132.79148864746094, 1790.965576171875, 110.337890625, 1454.0087890625, 917.1558837890625, 120.7620849609375, 522.5543212890625, 1209.98974609375, -179.36532592773438, 663.5875244140625, 1248.576171875, -438.07525634765625, -451.6646423339844, 756.00146484375, 862.1881103515625, 803.8450317382812, 1267.4072265625, 268.5421447753906, 120.3511962890625, 338.3597412109375, -1246.0419921875, 668.6093139648438, 789.9546508789062, 648.0570068359375, 420.7355651855469, 280.87249755859375, -394.3345031738281, 789.5231323242188, 401.1302490234375, 487.0188293457031, 599.7403564453125, 667.6987915039062, 64.57423400878906, 237.12393188476562, 321.5754699707031, 948.3125610351562, -16.48336410522461, 963.2234497070312, 195.20974731445312, 230.114990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 491.4889831542969, "std": 626.9342651367188, "min": -1097.4384765625, "p10": -294.4146545410156, "median": 409.36448669433594, "p90": 1372.9475219726564, "max": 1846.43701171875, "pos_frac": 0.765625, "sample": [266.2361755371094, 499.60333251953125, 255.2332763671875, 1393.6485595703125, 446.79742431640625, -150.61361694335938, -237.37881469726562, -59.25159454345703, 1055.574462890625, 737.0604858398438, 262.07989501953125, 41.027587890625, -1097.4384765625, 1846.43701171875, 898.1699829101562, 1738.1451416015625, 813.8423461914062, 402.3368835449219, 1144.396484375, 498.9601135253906, 253.2646026611328, -440.2046203613281, 1483.6866455078125, -275.6298828125, -318.32183837890625, 366.5684814453125, 1439.427490234375, 44.517337799072266, 964.1245727539062, -422.3863830566406, 1256.587158203125, 810.1012573242188, -379.739990234375, 242.18115234375, 237.20928955078125, 1305.994384765625, 956.3448486328125, -314.66937255859375, 421.4356384277344, 1209.5379638671875, 416.39208984375, 1381.5513916015625, 720.0245361328125, -231.69366455078125, 1748.6002197265625, 859.0284423828125, 305.20379638671875, 289.0550842285156, -72.08815002441406, -302.46527099609375, -39.694427490234375, 1192.1009521484375, 242.82623291015625, 236.97100830078125, 1352.871826171875, 147.49365234375, 336.99603271484375, 936.0465698242188, -141.094482421875, 851.5302124023438, 449.0920104980469, 548.09228515625, 98.6951904296875, 534.8629150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 401.5755310058594, "std": 584.1900634765625, "min": -1302.422119140625, "p10": -213.5038879394531, "median": 380.1781768798828, "p90": 1132.9267333984376, "max": 1629.79443359375, "pos_frac": 0.78125, "sample": [143.4093475341797, -1039.4287109375, 492.8018798828125, 105.51058959960938, -2.44140625, 697.7243041992188, 252.04769897460938, 181.0395050048828, 335.460693359375, 535.9957275390625, 578.0137329101562, -126.30435943603516, 193.6187286376953, 1077.2305908203125, 1055.2080078125, 13.180282592773438, 358.3079528808594, 83.18103790283203, 748.1229858398438, 774.9913330078125, 277.3745422363281, 1629.79443359375, -1302.422119140625, 1079.216552734375, 877.1825561523438, 95.56626892089844, 1621.424072265625, 503.4864196777344, 314.556884765625, 542.0206909179688, -155.52517700195312, -222.74667358398438, -372.4410705566406, 454.9435119628906, 809.87255859375, -194.15493774414062, 402.04840087890625, 411.7888488769531, 611.497314453125, 1138.5950927734375, 1272.7333984375, 455.8186950683594, -131.7095489501953, 84.3412094116211, 773.463134765625, 10.650131225585938, 254.38162231445312, 1169.91064453125, -243.56991577148438, -29.600685119628906, 656.0875854492188, 433.4612121582031, -128.1446990966797, 281.79193115234375, 1119.7005615234375, 863.999755859375, -221.79629516601562, -637.3944702148438, 26.21632194519043, 1517.5625, 134.12042236328125, 1549.28173828125, 1009.1554565429688, 500.6249084472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 392.7784118652344, "std": 672.6341552734375, "min": -931.542724609375, "p10": -495.83213500976547, "median": 286.64373779296875, "p90": 1385.619702148438, "max": 2400.607666015625, "pos_frac": 0.703125, "sample": [167.9639434814453, 212.45358276367188, -216.7530517578125, 162.725341796875, -58.46699905395508, 306.18560791015625, 919.9443359375, 334.378662109375, 1554.6866455078125, 227.75112915039062, 0.9453659057617188, -4.078742980957031, -589.750244140625, -313.64239501953125, 523.5252685546875, 168.75869750976562, -46.546112060546875, -609.8045654296875, 253.70570373535156, 1788.4332275390625, 101.22581481933594, 787.71435546875, -25.340980529785156, -145.70767211914062, 1272.7012939453125, -931.542724609375, 1199.152587890625, -927.496337890625, 499.75616455078125, 2400.607666015625, -124.26249694824219, 898.9531860351562, 969.3970947265625, 794.3008422851562, 1157.1781005859375, 621.6221923828125, 584.3648071289062, -14.41790771484375, 202.01376342773438, 1475.909423828125, -573.9134521484375, -35.629234313964844, 386.363525390625, 968.1837768554688, 815.9187622070312, 493.657958984375, 928.973876953125, 267.10186767578125, 556.302734375, 626.6864624023438, 1477.476806640625, 140.39163208007812, -151.11767578125, 559.1409912109375, 57.91607666015625, 663.9999389648438, 1451.033203125, 167.87908935546875, -47.30412292480469, -750.6407470703125, 508.375244140625, 358.22589111328125, 1434.0133056640625, -743.7615966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 372.2838134765625, "std": 561.58154296875, "min": -1057.6707763671875, "p10": -293.4051498413085, "median": 366.07823181152344, "p90": 894.4692932128911, "max": 2873.1787109375, "pos_frac": 0.78125, "sample": [-144.64781188964844, -711.63330078125, 611.90673828125, 774.7865600585938, 365.0911865234375, 553.1920166015625, 640.6551513671875, 367.0652770996094, 1169.28662109375, 468.4393005371094, -211.79466247558594, 1347.587646484375, 326.0804748535156, 739.8184814453125, 259.34552001953125, 305.146484375, -470.3287353515625, 784.3903198242188, 1192.390380859375, 342.4392395019531, -75.16813659667969, 322.76129150390625, 369.2558898925781, -37.094757080078125, 296.61175537109375, -1057.6707763671875, 254.46231079101562, 639.7008666992188, -459.0960388183594, 155.3432159423828, 665.1617431640625, 1345.29736328125, 98.13484191894531, 41.41324996948242, 579.1815795898438, 549.3417358398438, 507.3980712890625, -417.13946533203125, 562.5054931640625, 712.073974609375, 692.4432983398438, -43.12261199951172, 119.66397094726562, 723.8226318359375, 382.890625, 140.3619384765625, 506.4637756347656, -146.00547790527344, 450.4218444824219, 2873.1787109375, 114.26815032958984, 33.63035202026367, 533.0752563476562, 561.5452880859375, -328.3810729980469, 336.7816162109375, 471.98565673828125, 941.64599609375, 87.39227294921875, -338.9754943847656, -55.361839294433594, 558.4917602539062, 1084.0928955078125, 364.1635437011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 476.6113586425781, "std": 646.719970703125, "min": -515.9573974609375, "p10": -221.09183349609373, "median": 433.0534362792969, "p90": 1061.180712890625, "max": 3480.468017578125, "pos_frac": 0.765625, "sample": [-164.5439453125, 25.316936492919922, 781.9386596679688, 611.2916870117188, 492.8781433105469, 701.6087036132812, 58.87901306152344, -8.153396606445312, -515.9573974609375, 1105.3116455078125, 1021.4027099609375, 973.00439453125, 58.98146438598633, 526.6925659179688, -232.0420379638672, 454.9161071777344, -57.9154052734375, 1852.467041015625, 505.6712341308594, 3480.468017578125, 537.7432861328125, 559.2904663085938, 102.43559265136719, 1024.30517578125, 423.97894287109375, -374.56829833984375, 1066.2696533203125, 405.9774475097656, 503.5921630859375, -209.02734375, 1049.3065185546875, 1653.0589599609375, -186.49014282226562, 413.4249267578125, 314.6287841796875, 869.2561645507812, -383.53338623046875, -104.01445770263672, -322.0540466308594, 698.6205444335938, 654.0301513671875, 276.4264221191406, 417.98846435546875, 1513.8525390625, 202.19784545898438, 773.1663818359375, 1017.4742431640625, 904.9423828125, 484.2305908203125, 335.5909118652344, -506.2282409667969, 192.70152282714844, 1634.250732421875, -226.2623291015625, -90.76628875732422, 760.3858032226562, 149.23199462890625, 697.8788452148438, 464.2304992675781, 442.1279296875, 200.89266967773438, -17.06617546081543, 209.27474975585938, 298.15899658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 497.97161865234375, "std": 658.835693359375, "min": -510.6376953125, "p10": -130.24884872436522, "median": 335.0733184814453, "p90": 1227.5025512695313, "max": 2722.80517578125, "pos_frac": 0.8125, "sample": [204.6801300048828, 1531.65771484375, 465.9772644042969, 93.17414855957031, 96.3110122680664, 141.48060607910156, 931.7962036132812, -87.83734130859375, 578.9983520507812, 344.023193359375, 770.9108276367188, -212.8822021484375, 624.4118041992188, 1106.81591796875, -126.65911102294922, -57.229217529296875, 149.19277954101562, 1217.540283203125, 477.9278259277344, 564.236083984375, 664.641357421875, 3.7791366577148438, 2289.3310546875, 784.377197265625, 49.60863494873047, 326.1234436035156, 62.142574310302734, 799.5615844726562, 557.277587890625, 179.59466552734375, 442.36138916015625, 1231.7720947265625, 312.3663330078125, 2095.064208984375, -12.901580810546875, 748.114990234375, 2722.80517578125, -131.7873077392578, 1152.1856689453125, 2188.774658203125, 294.5228576660156, 400.2964782714844, 6.1278533935546875, 89.29833984375, 272.2755126953125, -244.56094360351562, -426.55548095703125, 118.3515396118164, 474.777587890625, 475.7853088378906, -42.13938903808594, 1466.53662109375, 560.792236328125, -510.6376953125, -411.6587219238281, 313.01617431640625, 52.847835540771484, 613.1095581054688, 81.01632690429688, 1191.3782958984375, 154.13101196289062, 979.862060546875, 887.7794189453125, -205.88833618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 435.82708740234375, "std": 534.9483032226562, "min": -1041.0252685546875, "p10": -102.63588256835936, "median": 361.62962341308594, "p90": 1212.4916992187502, "max": 1820.4642333984375, "pos_frac": 0.8125, "sample": [94.7291488647461, 1552.4765625, 781.367919921875, 725.7931518554688, 199.59620666503906, 27.881797790527344, 476.4741516113281, 567.06201171875, -2.6156654357910156, -1041.0252685546875, 424.6436767578125, 82.2183837890625, 38.86454772949219, 1106.9522705078125, 1394.9683837890625, 1820.4642333984375, -192.68609619140625, -17.982105255126953, 124.58619689941406, -86.66513061523438, 241.30599975585938, 525.379638671875, 569.16357421875, -97.72238159179688, -106.10552978515625, 742.4925537109375, 107.09970092773438, 1236.842529296875, 261.9532165527344, 1680.0992431640625, -104.74166870117188, 459.9824523925781, 228.73016357421875, 602.5386962890625, 3.9752273559570312, 632.6229248046875, 1420.316162109375, 311.71923828125, 678.4678344726562, 1340.599609375, 498.0482177734375, -10.556724548339844, 170.3595428466797, -367.756591796875, 1155.673095703125, 361.0574645996094, 96.34454345703125, 867.7879028320312, 423.8880615234375, 350.6324462890625, 928.0039672851562, 524.024169921875, 467.8640441894531, 406.7352294921875, 90.50360870361328, 296.379150390625, 892.06689453125, -390.0811767578125, 1137.515380859375, 183.81124877929688, 362.2017822265625, 282.37957763671875, -226.38063049316406, 580.6107788085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 416.90771484375, "std": 577.3792724609375, "min": -667.3900146484375, "p10": -139.26241607666014, "median": 354.6081848144531, "p90": 1147.78427734375, "max": 2541.988525390625, "pos_frac": 0.78125, "sample": [131.35791015625, 2541.988525390625, 486.8782653808594, 199.39102172851562, 417.8992004394531, -239.3297119140625, -48.419883728027344, -35.444557189941406, 297.8445129394531, 145.82763671875, 730.11376953125, 641.7260131835938, -50.81031799316406, 1159.5224609375, 354.88543701171875, 724.247802734375, 660.3250122070312, 99.88887023925781, 131.3655242919922, 367.91229248046875, 1080.59912109375, 99.38166809082031, 1076.95068359375, -81.25581359863281, 354.3309326171875, 1161.9166259765625, 227.32496643066406, 571.2407836914062, 360.6232604980469, 1178.3074951171875, 312.26458740234375, 94.02615356445312, -134.50416564941406, 221.62725830078125, 283.78997802734375, -584.2760620117188, 821.227294921875, 186.1683807373047, 2002.6767578125, 246.0478515625, 146.20657348632812, 477.5882568359375, 661.44482421875, 747.8460083007812, -95.06016540527344, 1157.5029296875, 417.4178466796875, 143.14968872070312, 803.7513427734375, 185.5639190673828, 1125.107421875, -26.552112579345703, 513.136962890625, 386.9140930175781, 618.4981079101562, -451.1753845214844, -667.3900146484375, 1731.4892578125, 634.1366577148438, 655.6025390625, 417.8307189941406, -299.99102783203125, -655.2601318359375, -141.30166625976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 508.21148681640625, "std": 654.2188110351562, "min": -1655.9410400390625, "p10": -114.94122848510742, "median": 414.1284484863281, "p90": 1250.0764526367193, "max": 2372.47802734375, "pos_frac": 0.84375, "sample": [250.72186279296875, 916.8355712890625, 595.4765014648438, 577.94384765625, 420.50982666015625, -249.12765502929688, 661.5996704101562, -1655.9410400390625, 78.61151123046875, -214.71408081054688, -117.29292297363281, 1108.1812744140625, 88.60368347167969, 1086.262451171875, -109.45394134521484, 407.7470703125, 676.0899047851562, -319.64837646484375, 836.4519653320312, 806.0391235351562, 825.8056640625, 1376.9815673828125, 27.210708618164062, 2139.97607421875, 894.0524291992188, 2372.47802734375, 232.3511199951172, 251.95567321777344, -13.129436492919922, 1045.9228515625, -312.9551086425781, 126.8584213256836, 255.8651123046875, 722.3662109375, 221.74163818359375, 876.9022216796875, 42.116859436035156, 1310.888671875, 326.883056640625, 686.3463134765625, -100.23258972167969, 887.0250244140625, 1086.5999755859375, 1430.37939453125, 317.68133544921875, 338.1023254394531, 2022.4140625, 1928.5655517578125, 157.0557403564453, 433.77459716796875, 368.644775390625, -321.7099304199219, 630.2565307617188, 62.37306213378906, 21.71636390686035, 78.86323547363281, 507.8660888671875, 944.4794311523438, 615.741455078125, 502.900390625, 388.1304016113281, 0.4073829650878906, 940.8525390625, 28.132484436035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 351.002197265625, "std": 547.2009887695312, "min": -814.7316284179688, "p10": -288.8000396728515, "median": 250.5828094482422, "p90": 1221.809130859375, "max": 1924.54541015625, "pos_frac": 0.703125, "sample": [678.1528930664062, -50.53050994873047, 1217.055419921875, 1478.28271484375, 350.5434875488281, 729.2613525390625, 724.2188720703125, 492.4068908691406, 1232.3326416015625, 292.6016540527344, 402.0765380859375, 515.49951171875, 432.9275207519531, 221.5045623779297, 945.06591796875, -379.9151916503906, -85.3771743774414, -10.654815673828125, 200.03955078125, 967.0999755859375, 715.0974731445312, 1223.846435546875, 10.800949096679688, 102.05255126953125, 194.8743438720703, -396.6962585449219, 183.05072021484375, 1289.678466796875, -617.2650756835938, 475.4363098144531, 746.4834594726562, 89.93392944335938, -119.32383728027344, 182.81866455078125, 750.1202392578125, 348.036865234375, 1251.15185546875, 156.3356475830078, -314.2425537109375, -23.811851501464844, -153.12313842773438, 88.44664001464844, 646.8428955078125, 159.00711059570312, -229.43417358398438, 1331.8402099609375, 312.2036437988281, -34.573394775390625, -454.3448181152344, -57.20108413696289, 916.2030639648438, -485.8253173828125, 345.3083190917969, -83.65519714355469, 434.2327575683594, 248.51248168945312, -203.16421508789062, 252.65313720703125, 765.0787353515625, 1924.54541015625, 231.57891845703125, 728.739501953125, -814.7316284179688, -5.969411849975586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 457.0138854980469, "std": 685.9037475585938, "min": -1973.8555908203125, "p10": -219.45711669921874, "median": 348.3846130371094, "p90": 1478.905163574219, "max": 2033.7191162109375, "pos_frac": 0.796875, "sample": [1074.564697265625, 252.12132263183594, 387.05157470703125, 1011.630859375, 213.2357940673828, 808.246337890625, 1663.9227294921875, 438.08477783203125, 284.61712646484375, 318.324951171875, 525.933349609375, 200.77574157714844, 832.6345825195312, -429.2235412597656, 1175.470947265625, 1501.8377685546875, 794.1227416992188, -387.6171875, -199.9914093017578, 610.017822265625, 1286.2900390625, 71.58618927001953, -170.2542266845703, 287.13360595703125, -278.8691101074219, 611.33447265625, 822.169921875, 220.7295379638672, 620.3648681640625, 1036.029052734375, 232.12217712402344, -62.06591033935547, 89.45236206054688, 1629.4931640625, 961.4229736328125, 321.23193359375, 61.25799560546875, 594.340087890625, 1642.2735595703125, -759.0055541992188, 172.04290771484375, 687.21875, 375.53729248046875, 505.58380126953125, 165.6294403076172, -1973.8555908203125, 1570.0784912109375, -184.41290283203125, -626.486083984375, 1425.395751953125, 382.2391357421875, 2033.7191162109375, 35.35444641113281, 74.03062438964844, 263.8251647949219, 1322.730224609375, 676.0401000976562, 1506.5692138671875, -204.90225219726562, 213.13580322265625, -225.69491577148438, 95.0070571899414, -158.59707641601562, 825.9014892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 363.79510498046875, "std": 569.5819091796875, "min": -590.6805419921875, "p10": -393.5046875, "median": 377.4759979248047, "p90": 1101.8716430664062, "max": 1807.67626953125, "pos_frac": 0.671875, "sample": [682.6632080078125, -403.29541015625, 501.3292236328125, 225.28253173828125, 714.262939453125, -590.6805419921875, 214.56546020507812, 354.2638854980469, -518.0266723632812, 400.6881103515625, -422.39276123046875, 481.9648132324219, -432.76947021484375, -538.969970703125, 1096.374267578125, 1369.81005859375, 437.0523376464844, -92.47502899169922, -313.54827880859375, 1104.2276611328125, -89.33692169189453, -37.50916290283203, 886.9541015625, 417.2674255371094, 1331.7955322265625, 679.9335327148438, 244.39669799804688, 1049.6641845703125, 783.7467041015625, 577.9885864257812, 129.9266357421875, 433.0491027832031, 87.82843780517578, -63.870819091796875, -249.04168701171875, 188.5264129638672, 138.66824340820312, -431.1726989746094, 538.7044067382812, -90.09446716308594, -370.65966796875, 1807.67626953125, -135.238037109375, 728.188232421875, -90.01802825927734, 136.482177734375, -332.144775390625, 249.47315979003906, -2.5263519287109375, -305.270263671875, 859.2924194335938, -347.4581604003906, 875.7865600585938, 565.2276611328125, 1052.1995849609375, 456.67974853515625, 1043.255859375, 1179.7735595703125, 988.3966064453125, 665.5780029296875, 129.0550994873047, 1185.4906005859375, 1289.3209228515625, 856.5747680664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..25558f4 --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2017775f1dab3eb44859d8aef70aed87952cac74ed8a1b1592f86909bc2b542a +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..fed6132 --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f3c0def32982e1c180ae0f242fa3dda6cd1ca785e6b5bf20a7500421796b46 +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..4abc264 --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293ec437e7ec8a2f1c5f439399157e7a7271aee5b0c628dbd04d40211d8e77d9 +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..5afa757 --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51de6381ae4d7e4c104652990e810b75c9dc726ba29185099d939badb449d103 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..311ad8c --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2823f79da591f256643e254d5687eff1ee3fe01acb56fe414ab141fad1c6b53a +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..edcdca5 --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64f8fc522a38027a0a45a7ddf2151fb7d67bc18eb3bee4eb2025724d09f4ef6 +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..2c67216 --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6eb398d9c8550f0a5441c939aabfb0a419c4ab922e3d7745eb887af48567b8e +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..d98482b --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5acf5adb8ddc88b7721db9cbcb6113957b83c3eaee420bac40f887acbdb7bb +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..d839903 --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06cb6c99c661af8f1e1b041c5a670b0893364bcdc150d6d06baeb3297d79114a +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..58bafb2 --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696079aab5a08de9561e41194a20ef3eb4dd467629728c05e355c417242319a7 +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..5e6ca70 --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd63c54777a18b0dc1fc85400c9c15e8f490ddc6c48c1539aa1f555c37da679f +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..12e7f8e --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eabefec1c64144d98185e3278ffc2958e21f57f5d4d0dc1b6eb4891e588f6de +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..253a9d2 --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194ea7731413cee37e5837eb5eb303b1d3f8ee3ce38f130ce1b4e80778f91709 +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..d8a2281 --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d861234f7c444a4973287ab8f6afda259b63042f0edd56ae5d11427a30bccf +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..5c44c9f --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50dcf0b0e8ec072ebebbd44ddb00b453aa5bf748a3550856f3bdf8da0ee35b1f +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..6bd22da --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101ab82fd64fe90873acb65fb61865b7340b0ca583e3337733cb6b91135a0ac2 +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..f838f1c --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e598dbd3c987f1ddee9e6bd7ad1184a1ee69ed0f6f23e15ce8a35dd04e1f4f2e +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..120ade4 --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b8ec995772325cd0b8e37f756b44a792b934164525b6fa2186a9da36111f05d +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..f3cf820 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a419eccb39e3cdefdc7335e008b98781a9bd2860740eb3eca91787e0cc40d07f +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..675c2fc --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c820c2ee99f52ed8ec76001724c3448d484b905c481a6d462019e1f75b916fb8 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..b83bd95 --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f03d38201502be0972e1fbd834f29bff1f60d2bfe0c5c272789913935b75ec +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..5e75411 --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f5fb29169b938549dbcb5fbea8edf1a2523ff2bc5232c33f1d8f0e43b3387c +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..3fefeb7 --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7f4a7e228a9a2265819250ae0221216ab73c6c4d81fa7d2883d87a4250da09 +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..bc01fc6 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca37776284e6daf438380abd41fb633c158f0d134de97a8b8a8ab9ecf3818e21 +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..6bdaae4 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a489113ac27868317207a14ac635dc53fad61f2f3e2ca0f1c7fe1c68bc78afc7 +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..662e62b --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c3be85cac4b4eb4e4696a68ea479488e74e32accaa5f8239dd8f2e9a841369 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..3871146 --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70061b47e5fdc57692e0700b95b36805ffb13f7180bd75b44455ec4ea2990b68 +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..130b352 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8b564c7f3fddfb329bbd28a35d0565520dc53e9cec7e7f0c02ea4d6d4cf601 +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..9a0525a --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3695218b5a1f6ecbc141249b51aec6c26ebc6dac0616770e484eaeb0f50766ef +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..a7dec32 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df13fb944cfed6937f0b0034052ebcc72ba322b5da744f4654aba182edb160f +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..c222654 --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4600c11888867abd0602445717f8161de44a158018d1dcc325a2734735d420 +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..a397398 --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3220db0ee994579e110e4979d70c23649afdabfc955057cbc0399ba8119c6b +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..5d03df6 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452e91e4e243ccfd6d3f4dafa08f958ba715507a3ef9a89c17e60981a481e5b3 +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..7e04783 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50d9b421616b3f0ff079b536a98cd502e5c33f533b41631f474e1a7262d86c0e +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..3aaa9e5 --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb300b21542c0640e1378f9b26faa2877ad666537fa84aa2718c7a3c73cc288 +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..68de0dd --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f52243a5bd3aed3ddb8852e6bd09f72553dd0d25f14ba504bd0056da8ce7eee +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..c777044 --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f93accfd094c87419736946f9db08240ad21ec195c9ee95651227d4010a2176 +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..55315e9 --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbdb1130a21c76a6a8b424c72b947cb979c5a1bb0bed5ed7cce85bcd282048b +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..1da7ed7 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd6eeebf19d7c71c3f1f334a041975a2cad85c03b4eb99428b091a4a06bbef2 +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..a3a591a --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ce2428ee0bd912bfd9efc5aaa838965a265a58e8c9766d4347de25f6fa26d4 +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..6290c22 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7fa9761438b2ab6559c4586fe7b02ff3365cc984e02073c75fed70c7712bb6 +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..35bb0ef --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffb216e05954d3d7c4f2ec1316f8bc8f5617bbceb45827eba04cdaf755eb6cf +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..f21fb1e --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b0d5e69fa56ed5fa777a9d24a9758a14f0ca537b53ef238e55ab8f6b41bf5f0 +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..bb90981 --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d640ecabec457fcbb111b74dffac90f6716678855d862a0ebb0ad1eb6377eb29 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..9c4ea1f --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66937a7f23c32a19fb2fc37f226b290a7307e211ad1bfdf2b8f643189332a58b +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..83e5901 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f49ee22ff27e99d6fcbf412546a9096aebf05af41eee002507600e050221c79 +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..28a2c4c --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a170c96aed7cf2b37e558124df7e0c19699800f1f971cfd1b89690b29b0b9b18 +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..d123880 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3228b700e8d479947799e5facb3e04149fe1ae3f2d01db06457600ef672f91 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..7bbd9ce --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4589edd7326c8cd356cea002373940fb64b560efd4614d1fc4fac2cfcadaf57 +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..656b011 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118d33acaabeea43b58a0766226f7f77cf7c941d18ca4bd843da28667e107645 +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..ed6a5c7 --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d498a4f7abe0604d9b37f13d6368c208084a6382c682f7de7c2b87c9231bdf6 +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..113101e --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe45d73f85dbbab4994f936878f7755bdc9deb6ecb930167ea79ecffbaccf602 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..c24bcd0 --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ddab4135563f64093c22c8bd401a184b83b983b9dbd542a95735a33438e65ca +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..ef51b1e --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4399d162a0f4edc77c2d8c806e1c8ebb9bed27be1f95a6bfbe0a3b34806296 +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..0faeb71 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49656c9b8a2fb68ae6740278953770a01ee88554b9ce17aeb902994f26cf7f5 +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..99df76d --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec3134ec8f44c53fe57d6854464cdcc792fe77f5b8571230855cea6e5c80c18 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..a3a6a02 --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c830470044168ea16cb36fb220e5d4def7c6102f792fe680da518f0188d3d9 +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..9b92514 --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d95175a3f69fdb5d50c72e5e909d6c8f6f84c32c445b6e163f1dbc78619151 +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..bc6b332 --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac4b3213485ca0cb27155c6da0b0e0478f8d1ead468f819fcea64d2611d52ae +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..e4504ca --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c1bc634e89fcf7d03fc6a94cfeea615db9313db33912c333abf4547304dc3b +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..34be4b4 --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e8533f1ee7517be2789be3b281a872042ba58673af9d9192d3b10de5eff5f2 +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..9d1deb0 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8208ead4c0525fa449c544e126b28b686aa376898e466a56de2ba4cf779850 +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..b718564 --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54abb9da54c5c75b764ca749a42c6d81dfd238c30c7db50c0400b3c82666c159 +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..8d5eb90 --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc067fbf7e7ad1fe197790e218711413d99d6477b8111c5df6364c586937f4b +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..705cad4 --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af89a4d0594ad3560385aea5bc6bbcb02d5c9c76dd8265efceb271fbdb964f5 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..41027f8 --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517d6060b6fb0b2c6a6c57c921d8c46c16620f06b6e5788854a52d22387f209c +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..c408081 --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c9082fb9de816b3ec2a6ffe104f350b8e760fa5f25ef4c15ae8b32b581f1a7 +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..ade6720 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b631a333f5e6f5f5c2dfca488a98e8ef80800d9ba9734bb174172f9c3f77004 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..efc88ae --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adfe3f4fa662dcbba42aea3be986c50f3004651ab2d7e340c587fc35ef717a3 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..277c7b1 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a689f4d13e686534f4717c643e955cbd8293c3f1b8e7e49720c30e50ec416557 +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..83f89c9 --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232d6bbed3c441e771e66e3a429a4830965448f7045d2a8fd1eee1f7b7d7aff8 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..1b88393 --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cffda3daf42456b99b235eb34eacaff35732d1dede082ac802c0e621e789c040 +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..cfa6823 --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e4e5bd6578fc1c9fbe3b0b31ab0e16518a50cc7ddf04e4ed7e78db5991f361 +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..678d0d9 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5fbbfc0e04131d0606d8f3bb4f40de39a593cd6017c3b854373d1e361896df +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..7030e0c --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53f51d47f03dda78ad5423337fbc2a308e1c43db23df2fff437997965a909e5 +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..8f60236 --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a415cba3dbf04e49bc0832068503cea508b760e18a83711ae277a4308d38668 +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..7f8da89 --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05365420d562957c53d82e352a41e3ac26c1052ae465fad4dafeb78ddc62b69 +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..6d22f02 --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6780280a5360c883b2b13a9dead00d93c084887b74abba39c2c412f2d9c043f +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..9c3f138 --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13de851827cfd683a4cecdd3770340db63f04f6b631a02841591bddd614b5e5 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..7234a32 --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc0a4b0fa5145f5571150a7cce56060739b4086e9203d0d8afc11b5a2624e9d +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..b644ff1 --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17dc73db8e90b86b008cccffffededbc2dc78e007632d67b9e6275a47b00225b +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..a1814ea --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c4250f64902b8be24311e401cc22d5bb798d976575fe5d6f747e4b851daa3f +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..91320f5 --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a63a34ae1a6e302727e8f0c7a6461d7a6f61bb84aa72d162a8d5dcdc6951ae6 +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..9cc0ff6 --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0afc786ed9ea803e0718b284787a6017eddc2dfafcee3dd1ba082c872cae348 +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..1294ffd --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e941e5850fe6f3ada27e86907f69a4715702b0f97d2b78c1b55692f6573573 +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..1d1e574 --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ab082923b7c98bac6d17db4315bc25c539d662a25074ba6fd27e5329c61ce3 +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..20a1439 --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9ea4ecaa3cffd17fd398b5e091b64315c99ec25e81c7e73b7167d71c5a4dad +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..2ee1a89 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a499a8e241597bcc52fd661fd4e4a106af974a77088772ab8fe9c1f6a676c58f +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..af93528 --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdf59971c4c3572854b2aa6313ed3d597c71369078bdcecb5ab8fb25fd6190f +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..65f878d --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8673d2afa797fa45fca5e3a2f24028d2eec386fb8a08ff212cf9cfd2a2298097 +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..469acd3 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fbce60344a91d670a3d83d3947a8c87b86afb3e77aaf3ba3786cbd8cfccc1cf +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..ae0110f --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57586efdf3402fe1828ab247e402d113206b699149428071fffe5f4588bea85 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..cebf61b --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c3cd3b60b84ed6a4e3a54166a2efb9c3fa5a6c5b9798548f2816a833c071e1 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..5b92e80 --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d60f53b14c4869a5934e42abd3e2ea21fced499d61fc9f11bbe264c6d7ea16 +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..df69c05 --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bd374274f6cb85fb42dbceb3e6d83e5e545e9843568949c685477b82cf9890 +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..cc3365a --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e2d7afc5c820447930fb367a9a395ca9e9beb215ea5720f12010fba9ec972d +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..9fa79ba --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25484d55bc116581ffc641d572fff454e6d66eb0a190eca873c9bcf2adedb018 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..d3953ca --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1151c9711535f44b5a8fd5aa0a1dda77d22b705d3eb53b8d48144454db04f323 +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..cc61155 --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66962e9bca07e019d73204a7bb363a2a16a90af24f3e30259489eb458be54e93 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..ff54696 --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68abb04b1985a5c9df423d62fbf752bd256660f9ae746624c77600ec934ea23 +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..12beb18 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099f289b330a8b74f897addd1ad33644b3604ba4f78ff41b4503af8bd30410c6 +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..990dc96 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d32e2441b488f18eb133ac8ccf32daaad6469613497398fc3c4f1ff60ace9b +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..e8172b2 --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa7c6900275b93fa048cf6996e927e0f3180ce90355aab42623959657ba44dd +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..871a473 --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b9d2d73d95e9e7bf2a9c65a4268e9e6b02c4a29f135a2951aeebd23787f4dd +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..f562b66 --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f476a9e1521cf5ef87c992946fb74e6591414866a8c57df1488a32ded3fd9e4 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..fe5a3ad --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed76813b4d9b77c95f264ce9b2cc3ab94dd751f6417a449bc2853b5d8084a02 +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..8e0ec7c --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e646782866c7bd16f7e2073cbe6ffe75791de0a2b7041a27bd08db3bd239f46 +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..9696b57 --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0535c0fd29a1a65052606bd73f75137fab7f24e725a4c8b52e3f4f913800c774 +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..1fe0f66 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7cabc44c97181cbc33c16f7f83cfa9006b9f4604ffa94fd0df7264bbc7b80d +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..21ccf93 --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc5dc14bf1f77dfcb2094c9f82faa3fb8d3c2f60567fb74b0439128efa24c69e +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..157d2fb --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93d8d7c246261610739792053356a9a872ececcc1ccf5fb74c861eada0de1f9 +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..ce690e0 --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d70a7547df0d5fa5d4b6d1b632f264e4ae71db856fd43e134d4f4072153149 +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..2e46f84 --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9d5062236bf9925de975c9fb9664d1e62a09b967885813a41f3f0d96828e60 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..110bd92 --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2225e7018e702382cee11a47f88bab786c27b9ca61eaa8b1e591064bb8e0a21 +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..ecbf727 --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2cfa0b9040534ac35b3ed2b685d6edfc979d989ab76d425d669a9adc1e5596 +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..f9dd9dd --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17257b768fcc10fd60ebf92c4dddd28e87c9c219949f56bbd43d3d61e4aea1f7 +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..dbb88c0 --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90cfa5cfe67a4f09863483fe1a1e64a2531b5ac921879b7ab1491a1feae8cdc +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..c578efd --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec285addf07cc7fa31e9f1719b018de0f8e2646f17bcae11020ebe6665edbcf +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..80709b6 --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e151740b504d9c0e22f8d85c33063787245ae2f5df63203dd7b498c88122975d +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..f0d2221 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d3cacf7e76fd9908b7b2bf4b487f86bcaa25dd46438959e07d1adb738eac63 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..5eb0750 --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424c02d354c9f1bcbad90a7018c806d7308293ef3191954f0628d026d839db9c +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..b27533d --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a925e9a9c65cbc6da14a63c705a9265595516cfe7d0a7a7f27f81ebe36b0ea34 +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..077129c --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4366d2020d38be0627db8bc68e1eab17764793df23cb81f06e81bfa8524ac9e +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..32d19b6 --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541c4442b2e787c38b78312ef4fece76fc272ba391dfbe5a009c825e790ef37f +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..ddd2594 --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a873b8589909be94a35fd38a47260481fece69be479395210ee4d8572fe832b +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..49e7782 --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0f7311315979dbd93874a77de430a3d457a35dbccb8b4f0e63acd5d053291b +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..cefad89 --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74245d1fc590177f8f53f0ff8b5bcda61ea2454f5d7019016d228feab838e94 +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..1614960 --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b6066383aa39791cf4e0930cdaa1f0f20f0bb5e71eb005a2c2e37132ba1c3d +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..71c6ae8 --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903d3cc59d208bde96e2e2001c1d33d72908e61a340f52945975c8dc5ed9a3a8 +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..69c8c2f --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314d6afc9190b1fc9e5367cccb08835e02fc7740120a914ec45e2f35ecbf9142 +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..c40280a --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6904e6c4b2e5167ec739024034a97ee12911d3b4d25d270eb28cb407190c7d +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..7defcd3 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9bb0d5354d19b4d8f81220a490a88b8f21f244962048b3e6fb2909e517215c +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..755ef5b --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c6df1ecdb735403390a38751412619b3edc49f3b2c6bbdf5dd08aa19e11562 +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..398671a --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf8e3f86bf219c8eadb3dcc26180dbcafb1d2dfa71c2efede537bd2bd046cf1 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..4b7e30f --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad98a9b84205c6b745eac9f5a51e40137ec8357320e6e27befcfba8970c3f513 +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..81193b9 --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54cd50e01288f179826b70ef9b638514d500c399c218b159c394457e5109b14b +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..a577a15 --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ec902ec82d11d4a74c7954002d296d80e63dc35ba8f132a99ee72e5c7437b8 +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..e40c5bc --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2e0a87c1157b86ef8457480c9a98850e4c7161c91b70dafbdd8756f97d4c3e +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..c6d5cb4 --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7321882b0f6992575934945ac69f0ad5d2569cccbfe4a6fa4490ca4ba29e8f +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..e3604e8 --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af41d933124f6bd7f72ba38fc412d3bb065c1993067f3f3eade2df9fcf2d6bb2 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..6ad0dab --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8254eeb8927b83be70e9beb8ee2a6a8bb1ea8014cdcd1bc878f4270fb51c8a45 +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..cabab51 --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9a5c20d4ff34fd62879b2918f14e3b718455b11035199b2df9c7fdd19be75f +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..9c4ad2d --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51750255d87cdbc854d0907eb7642b482ad736fabd3220844310d657ad5b0be1 +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..e83c0bc --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8bac51600fbb751ee9592e33307c3846580277ad0d1541ef5e38a084988c03d +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..b51a26e --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd9f28b1da0aaddcb32164e5c0f7a8d4efa2c9fbc5f05eb3287295c33392d28 +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..8d7c23f --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54c1e411584c1e5f05ff792b47548e5fa11e21ba60ed83354cde3e9f8a9425c +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..a76b337 --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b46922435925e4cefd5fc5e6f70af8a2e2be4855a98803586e3da499588d691 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..fe6a4dd --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4708f183809670c5f91432ce2c873f18d99a86f0662e88431fd4d4de527b972 +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..a0d95d1 --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d34415aa5ef815c124f1ad31fb6449e9909036b42ec7a5b560704ca2c73e22 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..4c6a223 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7117f4e1f8aed7a699b60f29623ec9badf3886bac534d91d4288c2d5b0dd279 +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..c7f19ce --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd93fd1956c1c669dbb82dadd16384371ee66d0663c13325175a385aef3ec01 +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..4a19c83 --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e9ef70c41e887de92375f85f2ee30fa482ad1dc0467f9bb68e6214bd4b6463 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..f8066ac --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b33002eef7bf466b708c7388f74a0404b8fde525e12b3d1ab830541616850b +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..c42e7d1 --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abdaf2d621488736e8319304617b58700f605c9aaf3db3a54dfa5529ab09dc52 +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..6d7b60c --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e903eb29fb370cb1313076f5bc768471e8f2812124a857e2751b857e8e5f3f1 +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..990e04e --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668de601531792b99280c8a06af07221b02da6f67c701820418cb847f49592c2 +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..832aa8b --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ae0fbb65abd5d625142adf42f337fa0b248bc42278c8e139585a83a134eaa7 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..d0e74db --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c81b145f8f0f0ee538f578853fead9e170e1fbb1f4c86945907784ef2d1e67 +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..907742b --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff585ae8002b7be3c254d81820a234ab217f4b48a99d2ebd54c37512ad3ad1d3 +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..9c472ea --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c25b33dd670fe104d760e615ee5b54d1025abff217aaef56da27bc78a065ee7 +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..115c0ab --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3e2a4123791490f88ee88e869fd7a7d0a3137e1da56a2e7b86ea66e87838b3 +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..bc06842 --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f3724cc877892985ae7f8f0a456ecdadf225d767e4e7ef68eda02cf442dd5d +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..91194e5 --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd591797ff60d3262b931eecbce12b6d76871d8415abbc0c69c4aec91f0d29f +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..48cd100 --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce78648839dd74ffd7b3524323172729de5049b13db4b87b8705a4a8d04b5c8 +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..dd59e98 --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc644b3503ed3df18d88ad3df295157c3295863d8cbc9b881776d306f639200 +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..008b9c9 --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ee46393c216223b348cc554702db9d24403f960791e791742db8385229fd67 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..a91f8e6 --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4949422b9fc5014f6eeb0377558ffbb0f1e1aab9e97275bd09ab6cd231df6753 +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..7a2ec11 --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c8541c2e26d164b67cbbaf42ecbe9b64f224575783ab0686110b7ad5ebb9ba +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..b8fd3db --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d42833043433476ca11efed4277619e91f1e0fbde6871f35dd90ba5ce291ae9 +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..a1a2a31 --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0213459af199f984db444c991495decef9b0e98092a2e85b0d9aeeeff713fffc +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..9b4765e --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540e85525bd3be60827cbe57a32c6bf1cb970e67f942f3622c581a446e93513f +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..bde1415 --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719e4fc047e6815e91201cfd76097c249c11209edce9fe9f2dc554c7e5b6a20f +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..51bcdbf --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6abf3d89bd8be31a9b2d0375163630b34e3aedbef5b0926ba2aba796c3b5f7ce +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..bdc81c9 --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08864d15aa4af8e929b2e17a497574e13b4d51da579433bdaa305477f1d5a00c +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..7b69048 --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f90a4c8a067093421a0db4d5c9f5c3561881b6f7e6f528b1bf33fcf3e9c7a96 +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..c75c065 --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a75aceeaca275eb90b9430dcf94b7fd5f9d327fdafbd9573b4390e2926bb95 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..80631c9 --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6aea4be3ffa7684fc6fcff241fc1b8d0eec909dd27d30d3a2b6a1e21011b669 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..b782389 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b310ff5092aa4e70e2de496f419cf722f0c6439668cbeb9259c07bf920c56c +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..5a7eb43 --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e776f733671624a04e0813280823c58b99bfbfd8889d9a3948a2ceefc790b8df +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..e8e794b --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10e3d8c59a9e1bb31257e459a56f5e080183bd774b95c0c9fc573d325757162 +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..d99f650 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367dd930efe2bee935c47024aaaa5dc158d9b1cea12a6213b6189cba17a24e93 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..036c7e7 --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3662fbd78c1dc52fe8091aa397de831f75825e71ba519a8d1953575a44ecf91a +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..1127f51 --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:686b85c949a391c5bc6f2ede04fba99af9e81bf88d175ff9696313b0eb3aa9fb +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..7358bfb --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb9456bf11629bda223780cb731ebac340d17a974e0b7e4ec1c33f0ff18cee1 +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..e7974c0 --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59fb7fdec4dae42791c0e0bf4ba42bd4d36ec231cfe61a8fe9a7c09e07cd8915 +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..a373a51 --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d85e3724ea2e9af7ea09d73b1a6f132531e48234ee2498019a27e380e23466fc +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..e0b5821 --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707cfbc3412e782ea1839368ae31452f886c4d3acee8b2192a113da0a83cb642 +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..3a7d6ee --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0284a13a993853b816d315fae8ae2dcc5f364ad2eb4bddd10eeb7813d45056 +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..cf88be0 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6038d68f82b92f60f1f25e76db75eacafc787ede183e220353f71c8423882b +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..be3eca6 --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6aa43bc0156c4e876b52f8203382be4badc711ede2bbdb2ac71efcb143e0155 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..e1c305f --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3bb19000f8660fee764a7e4f7b1838961d4c143040a577736a812dfab39a9a +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..75c2a33 --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932c2ddbb7d68a8ce125897c108411d7d9178bf90d555d053d23e42d31527939 +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..ac10c3c --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a92f68821740e14ba5d97febd500e96ecb219baec138103d6c538b7195df06fb +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..dd2df5b --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07aae48230770141519c0cf055d406f5c0bb9116e64f063e65dabaa2b4cc792b +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..d113d5b --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1441dbd60a3d0fca2861aa1fb2a4124f1f2207180d0cab88d1181a622d393f05 +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..b7949f5 --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41a93d67e755f5d82170f3c628c16b50dc73d7b6d010bfb8e7874230992b2e7 +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..a0ed538 --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22d8a66a49561fa7d2a9964eba8ec13b155045b89a44cf2ba345e1b2ef8e902 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..515d6f3 --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095c4db7e6b28bd872d05273c1a6c36257ab8e3c575bb15e37248bea8275c2f9 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..a45c076 --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff918b65d1fd89cd428b3878c0f29ece98bfc88065dece50f4135d526c242c97 +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..debea9f --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:965f22699e34cbd61f9d4f437a7589fe0bf53e98697da461b842291ce5abeaf8 +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..f491e76 --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a10b479ba91801d8576dbc0ccc7eb25c93c96504ef4edceee24cc6819fa2a0 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..f64c38a --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e86986d826e70d703e5cbeef788f861b22b9beda2016e80dce326582bbe812 +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..93b3de3 --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1858c1fbfd74b67d5ebf6c4d3d04097085234670788cf6cd65914b05090581e8 +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..6e7df62 --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a2d29ba9b9429fc1d41bd5d232d8538769dc32a6b6cf02df729b0c327ed00f4 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..566e77c --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37592c6d16dc95ef294f0f9b17943634242da958f8a2fac485385f4b054a5ea0 +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..f4a349a --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e957dd113dee9f624c0617a20d3c8c031a36d66e90a25129567e870494b7f4ea +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..5f9956d --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a47fa170645a630c0d30a6d9ce83479c791c3dc5d5bb392360aa080da62469 +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..bb095ea --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08bde34f4f25b6579d129cfd471b3b9ce947a92df308ec2462d52c15bbed80af +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..e906040 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3f7b8486537e90c2ca9cb65b614769c5b317238827bcfb63df3e23c0b49004 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..2735376 --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a68cc16f07f4a04dcae6da3a8dd634f8e3f49bc5d88de0570dc3b3ee0d3287c +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..4924253 --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea73001881488bbcda7ddb464c890c1e371d50c5fe2eb6a85bd3f6153838bc2 +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..82c9c8a --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283855eeeabd6c7ad3e11e6c343b4911a7eede6926584e70f9fac3fbb32b8ad7 +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..5b6ceec --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140b19bd48898eca0b7d567c86e69f093e73bd562011abdce32a3b617fefcfab +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..6cacc46 --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8591e7b7eaa902d6452fc2c42cd6d5560bd8c0a20b21666b4c222312e3aaaf95 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..898adb4 --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ea9112bf8cd3307e35eaeaf00a8e2159664b6b2b9f46909cd931da91d43769 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..398e907 --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3b084e4a54f4757d8fcdb71041bfef0e9bb155eec44a631bc1a982cf53b991 +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..cdb477a --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c7fb4d795d56036f4fa7618760a077f298fae7e95d6a094226f2d6b46768e3 +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..6f29847 --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c5b3942677dd76ca070d03c86b12d5679ee53ef1fa4155aae4bf25dc04bd06 +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..9c68592 --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c10c13b6648227a3e3301992ca39859b7f1374ccb1f8097b3c33a5b737abc09 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..79d35ba --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2cea3d342b285090a4b0326eace2345840e37d5be76d0f6778bcb8008ba3b7b +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..b3ad083 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f155465e6d17ab3c0ff9c206f247a4ffefefc3c6439ec0bd1045d836c34fb526 +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..818518f --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d45246d41b79ab58daafcec24add7ec72fdac454e88df633d38b4381705659 +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..bbda1d8 --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849b777334d7c97d7d0d3d848e54dd9f3681a4f2a1b5d732d132efffc4594cf9 +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..ffa8e3f --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05235ee8b57fc4844d4901a2b7c69bf962f35cf6c03b40f6e1a2dbe2958bc4bf +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..0df45b6 --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2957658de99f03e5911ce373898486fb9fb92286c5a0567aa100bbb334177cc +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..7dbc5b6 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1755e10d0e4814c20daec83713f3579be6b83e2d84a9f97386e29b76e186aeef +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..e6797f1 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56376355a5cb17badac606d19457ed401dbe2723fc6ee4d698ba3ebf29965547 +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..9478832 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e6b55e7837111436d7e79f3e5bbf93616cd7684202aebf71ceb87617bda06e +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..bf7c879 --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec93e0683ab8f48fc39fc534096056318ecf9363530afcb1ce10e9af60c1720 +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..c076b59 --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62fa80bdc92b60f2b9a4280d0b0f76a97634278acbeca9837acbd9fba2fbb97 +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..64a14e9 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7849933c43d5e47a3e2b3fef973e8b5633eab07a8a859d134b27d7c7affe286 +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..8995545 --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd4837ecf4ac0a8d3645d220d80b9a7628b6e0a87e68373add0b6d7135f55c3 +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..40e09e2 --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10753e545bae485876971ea570f9c5078b7d1cd0a23b43c5acf94a24450bd5d +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..ea296c0 --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac694421a5e75835e8aeef1a94a53319e1436f1a9c06851e6037c98a50158a43 +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..104b644 --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe037c7f28070b5c6eddadcd8d6203b22788d43eaa7035b0edfa6690230e8d5 +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..f62d2a5 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0851b1740dbab523f8fabe21d33357d7698d7f984ebd2868b8a74372193aea3 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..abc2c6c --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d0226cf137ff057831ba6cda3b75308cf3a7cebd9916c5a5e636bc8f65d63f +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..440dd13 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c2cebe9852bad042b17cc507e100b9d63a2a90c5f9b83f6d232bb44452fa326 +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..dc9e605 --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d7e9e33353894d75db8192978c081b89c534eeb838af079c2c928ffde71a2f0 +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..bd5478c --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86aeb96866cb7dacb0b87247f916dbebf662743c6569a231f3aef8c95bf1e0ce +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..14ca0b5 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8fb594b95081c43296eba8ca93e4dfac6aeb94ac598c94a7aae2da98d783102 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..8a603c8 --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ecd176b84664ef79aca35202aa2c19a91926b043893e850f27366ff8a79c92 +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..7936cdb --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b305c971d16a7e6a65fcbbbac7384daca96affd016cb6fa4a1a9e525e184c56a +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..61b2a1e --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eedb36c8c742a4688d6adf1593e31626d8877f1343b6bf0e7820e7fe877b0427 +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..2c66290 --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae9493c52ba7840578ee0731dd6b9a62c5feffd6ce0d3271fbf94897bf20fe0 +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..541fcfd --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dc7d577e8b3bce7c1d639c1a2edffcd11f38c0ac8027c67e76224756dfef46 +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..606aee7 --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd2f1ead27e9577e1cbf31849881761e44f6b93739e444f366123b06d410230 +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..4f0cb72 --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a070fe6b750730a9cb2d9480a22706b01f4370a9e53c108354f7cce1a143e7f7 +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..a845d8e --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1bd5b8cba9f1a444d9480ddbbe488e5876c6ef711a684fab0e789626ac95dc +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..89785cf --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b4e09cacfdf7cedef83c781003f66b0f593a1da8ab7d9b74d2b62deea84efe7 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..f5b23d1 --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ccc0352bae16d706e6c7ea3570c73988765dff0914e0740e64f51de5f3c4c3d +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..637d192 --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4289fea0c69e39f75532c12c3beea9af8f1f6717e85879b7b75476457ed59178 +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..a6e40c0 --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:212846f0d65b5ead023f765627ccefb6de869fd2cd089c6e01fb5ed84e41e8b6 +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..405b430 --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906488c0952f885db868725d477ef20b6e201736f00b77bce03339eeffac2c1b +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..38cae08 --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acee940e03b3c17c49f01426485391e16c1d317c39d9bdc54c55c702aa6ca61b +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..bcd1495 --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b809d4c52941b63257b6cd0059ac35f42fe21d925731066499e2d0d4b4bc2bfd +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..96a3d74 --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23dbc35cf9d7c8e86a866b1b65399ffa03e1770d391b53a07a7ec8677df7e5e +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..06190cc --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78e2c251c942da5a2d0f8858f9c9f31e1204d4964e9cc32da01930d946b40854 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..3fb103c --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f834bb38e7bcabb01e9fc766932c5e162229ea6d26c18cda05f60a6ce54d7e4c +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..3692012 --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406b6d9213a4dffb380d7f8d7333274eba91385aaa75e7c9441f89b1c9d53377 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..afb9c31 --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b5df12bc69a00e355e459011f4991357ce63c51ea16b8c08a1637b9b94723f +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..00f91a4 --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfcd959b9b4557f7c07a899be55255e56d2b0d678d362d2d158372f75c22eabb +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..686c818 --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fd48a343cd605af41e32a10db027a505fa4a9ed628e6a0e6afa7ca1fce659b +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..f87296f --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34e880151831b854e1a833940408fd8dea4f43ea7a75846683e38771fabecc4 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..8e055ae --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b649b7e6c5d03ed2417bee6e3a49bb0610587c330d31ddabc84c74abd8ddb2 +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..980f2b3 --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b96119f74d3db93806e52d4ceaf8415951a2382926a852e3a70e86189a23a23 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..c29acdb --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e3e124b13286c23c4d9679f55cabeb61d9de282e13b3d9459ae46a7d2c0a94 +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..c150025 --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52eb111e79e516a222a3ce6bf1623d8ea9c8420ef29e29127543ce05766d2a5e +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..cb91701 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae1051c2471274658f9abeb68b7270a39a858171d1859d7b4a1bec0eb4fdcaa +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..cdc2f48 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfb7649dd2255eff7c6b5338dacdeccd90e44ac059c1fa8a6b0c20652d86098 +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..3df0905 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a6d7969d50f89bcd34044642509c266f2efa007e25c7af61b9b25caaa483a88 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..812a0d2 --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bea6fcd41a9a7fa94e622516f08c2dac32b0f96985c7cabee824bcf9aee1479 +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..5d945d9 --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a251a1bbaa8ae54975e1bbd852f55b493f80ab88474df56dcb9c32950a155cd8 +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..ecbb275 --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532371ec0dbfa9b54511703247064ccd996afdea63c6ad7c15bc3692ae4fc70c +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..b5993a0 --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0471c65eec213dbf08c88866ddba4ce5c52dca750159de9124c7f8a48bdbc71 +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..8af7900 --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39b8ad6e007628db966ed418d3a93d1ab074d2261ae02beb56a5ab508c1107b +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..e2841b5 --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8042a683119dd7766935ee62fd7a42d58183f49d8700a577670fab302f750ba8 +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..68217c3 --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1f9863cdc22d1554a695fd374a73fcea083a761e51972c976362f6b648060c +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..bc35bc3 --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f5657ad9c81c4d9dbd45b89b586e95297692b04d864d5b1008020e6fa9b7c6 +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..e5565b8 --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3047785170251006e2232fa0d3fca4b28226b3a54c62803693672ff638c957f9 +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..dbc9331 --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1bd9985b1688646b106cae4bbce03e8b8bf948b080051e98d61d83123dd051e +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..00626c4 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e53b3f38f71db7b2a256eedf37601d424d67871255b4b39a63d2ac6a532e87a +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..ef2874c --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93edb4c0a4d1e8fd85d1446eaf0c0ed8239cc01c6f0a1735929b2903f7a8dc8b +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..05ac153 --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb811b41fb8845cc63b0371106b074cd5a89ddda9c66b576f2854f5da8afbed +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..637c4a8 --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce5fc539d79b546d1ba8ada707ce2d80405e3cfb2e85b897ca29083e0718a48 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..dfa8ddb --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba15059f549c3a70d4e53385e4d3e86611208e4067231ba57b6ac09a8642ec89 +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..041abc0 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ed3d782c118770bf4473bc014d6649cbb5d4b9005d15cf7cab942cf0f5e32a +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..b8c551e --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668ac0081a4c24fdee63301d65b9552648cd38beb86b5353f54527152eec79ea +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..8fa85b6 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72cba6a7868ba0f29258dbee7698a15b9a46dcd20867e267094c9f60d7b5ff2c +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..101b9fc --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ee05f9750cec0b5b361e36828cb20a01fa8909ebb6931fc45de74ad208c86c +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..a872791 --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de98eb39dc9b31de12c3e415a0cc27f39fafd61acf63987d108524fa424d22f1 +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..9bad74d --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab15d4c142daa8b04c234489d7b7b25eff185c4c863182dcbf716f6c5f4df5d4 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..346e8c2 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0259989c76b85d58ccb0702edb962dd736aad78e3de65f16a218053444c7908 +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..032c709 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db5a8b7d3278e5b0761d09fcf8f23d5952f4ff91e35a6fdb08b372eea8544ad +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..4abe0cc --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d8840ba98626797c32259c104bb491bddcba2eceff1802b1b58f5553cedc905 +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..08fb6cd --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1407c0ed04e1a7df0ecb112a55325cf7fd1fd088ed32a46fdd216875bfaa9f6d +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..58736d6 --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759982b65528137abde441d127c3c48278d371ad94080c6b4a395c6c5213266f +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..144d4e8 --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ff865fcc8903238df6638b35dc44d30cf9dd4164b12b83e1c806684f54e8c5 +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..c2e5505 --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ec5ccee40a4407572766d1a74f715789ee73431ddf5db39aa806c279b99b57 +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..2200c27 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e4b3aed39c6713a2a1718573b2cf83618325ef80d9fc607f04033689ff89754 +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..cb1419c --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0582f1e45ab6ad00ac111509e18381e3ccf426be0ecdb767b80783961a5fa7ac +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..6913407 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cacdd18c556fdab264e28c8ba283a3de5254e1de21e50f2b02f10b055d5ed16 +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..2366c2f --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0278c8eefe282bdc57660e8da974542a79a1f6b68c72c179be88ccd4b194080d +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..cc65987 --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4145931dcbe4667d94e725d272426d40337a7ca9f9635d3d411a995bdf21de56 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..55efe46 --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88ab82a0aa9fd4a3dc13d416b851f2d528fd5a2c5ea3a3f064be66e832572fd +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..3f0309f --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5bead85d41c9ff20cd9ea88eac52d3bff55d942d9c23e390b5d0c9d7534cc5 +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..175caee --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd55a7ae1eddab35eb43112b181f466cc15921c8b36e69162f5aefa0b85aa92 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..2f3976c --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f6c161cb4cad64854b4f05cd33a69f0a695b43232470e1b379d91e41adaf64 +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..4a1175d --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ceca9c91dd57e2dcc26e4322b389997f4c9355a6ad0aaf679687cd11f7aaa6 +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..ae59c84 --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570db83fb3e32f72ce2aafd4e62c71fcc009eebb2610c57b0289ee26e7a41393 +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..6467371 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c44fed012201877c5efef16ba60f0a6a98c5dd3c5f66ce35feb74bdcf416e0 +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..6443ca7 --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d3a3a11200e2a0a5382ac70a09d64881dd92561ecf7568f849940018e8f536 +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..287bccd --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7753c1220da65c75225e28c023a337cb963b0c4913db7b8e2e8ad878cbd9a829 +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..9a10c8f --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3091a3364f0337c52b299a7af9e3673e3bfec83df8156691d6e03666e5628586 +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..ae7163c --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4f15426c233f20047e1023a8cce77386f1f2bf763ab39aa20d9eabd7dbc3dda +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..fd7e3ea --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311bb074dc9431b3bf5a3b1d48469d5ba6c023c36dd03d720ac6fc3a0b69d1b9 +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..3b1427a --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad3b65ff6643b7f0aa5e9b265d303a6f286c56f438c6d607e92c1b0e4f62692 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..d99a66b --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5c4c20cb20ce254c820e2ff3033f219f0bd0e681c84829f167814e8997ce87 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..e0c9abe --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2611bbc887348af1d69920be2ff40e2d230cc6f72e22ed814365f2750a75931 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..8995afb --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29262c40988d7a753c2832d9e5bc7347e001a3f339e278d81142092272a28ff3 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..559f8f0 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccaa757c09254d834711c5a44437757a4b074bfcf510e99eda9e0ca4ee346e93 +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..ac636e1 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab963d63f87398fd5063bc0896d9aaa84ec333ec586e1c2bcbbf888d36895f61 +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..6cb3d5a --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3d87774da7480f40fd68fbe27bdb1ff33527ad5ce2bb8203ee72a386be7b7f +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..1693e88 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0978258a17f48b4ef8ced2cac82bdf7670ea27309221a5ee11e21006a5e04409 +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..0a8ca12 --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f8b9b216b31eb44e67fe144daf69936d30dbf03b62bdcd3def411252e8a8b3 +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..242f86a --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6ad477bef375bee187934f6cef4e53d71890118f599d78f3e6758762412fc5 +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..a7f95ec --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d73f8cdc05cf93e554e8ddc59542e12f21fd123861b8aae259345cbb53d01265 +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..e0ae654 --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a43d19c0a4d6b7cd7b72b7f1b35bfd004b0406883c205eae15d977415bcdbd +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..26d604c --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f39b816a349b4faa8c42a68a018a1433a84e576c41f4739a8074210fbb5b1a +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..0791db5 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdc6812b9d3f188acfe8fa30cbdc6e6cbc110a6d3108026480ae4e289c187bd +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..a36a339 --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6857fdecf6ab91b2d2e68e155e3c7208b2958f26cd0bf5abd58263122c2efe18 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..0b70aff --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84311d996b10e43122f168d7d48b65580fde239fdbcfc03b8c148abac11d4037 +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..3ce971e --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca608fe1f802ef94edc7ea9b1409722d802e12beb564bed8f1c02de161e0488 +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..a93104f --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a30d0a4a5b435e1109b293381cb7764dd414eb69bc19583736e38503a117163 +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..2beb41d --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d89e1817545800846171d1a32e6cf8cc482ff07b2eb2f07b1e676424cada69c +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..ebc4d23 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6957dde0b613d3feaeb9991f8d25e8cae976733735480a108ccf419fe6cdbbfb +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..d4e1897 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4021237244064a30056c46b3aca11b0ce129d80b90cf3b0eff3f533350e0383b +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..b3b52eb --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66ef18f7e539c16b1776b71f9bf868946deff9b97affa4539578ab6ac7a112a +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..24b518f --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d41b15a61d257b04fa826f6c83a56a154ca15629415d521df2f53c6df463f4 +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..bf30eb9 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40660d6d857292fa3d1bec2e1667edf665c135cafb6c7f27493017738ab106c9 +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..949bd41 --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8778015a489915f3cb29973eeb2659d42365a8fc940cb14f860f5abfd70355d +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..a78bbe5 --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0681bb5e6d8482e6437d2444f478a3f39d1e1f4652f17641532370477fdc7fa7 +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..07822bc --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044135a41d04ee8997b40757a4719ea5b9d3a73e1e3e16e713383424448a692d +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..a3891dd --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e499075ac62ba6ec52262492f61b23fd97d22ada85298aaef5d680ad5ca4f360 +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..88e41ff --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb04fa1c2372b338067ebb60f7552b735bf8cec4973bebcdeca1c30fab5cc160 +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..df950e0 --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc57234b3a86622a059af057d63941096fa277bb3f77e816956211ce011888d +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..060627e --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13a7abd0d94fb4eaee4ce6e0c806d5e1e1d155964d9f5c146ebe88a5256c819 +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..eb2c6b4 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b3a77fad3e7e23566cded87ef93711963d82a0066d5cb9f83c0d2d97f412b1 +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..71c414d --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e18f2be6dbb64469a173846b0ea8fd8f11a9322f8a93ea13ad80703a7a7a37 +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..a48c36d --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13f199b33f6c9231e95bb72157de2e9d22e3c26d45b9efba143fe16df1114a7 +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..849c228 --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a3bd7dce59f3697dfbf0175fa8e332ad40e53ccc9a4d0915397f56d3d49c62 +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..73c30f0 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6c25822173711a48c625b7a7c93b612de9872414a9a9a05b5957f442c4615f +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..5fdead2 --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b329cfe778c14bebeffe321c1fcf64db9dc163556b07eb98b1f2f7c72653c3b0 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..c2361b1 --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0cc93cbd164d18bb407936648de6ddfa9cec305edd7ceef47119b612a5f3825 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..9f3a0f7 --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521602f402208ec4c6e0094fcc18ca812e68ff046907fe3016712bcd4cfd840f +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..1b1dc93 --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc6926256f2ec3840431e7c21249db408cc06184c36057c43c27539da62cd5a +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..8da7f7c --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8499457210ef9c039d54da78bb49c5abb8ecc66740d0526100520723980ebb +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..7b2b53a --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5c6b6ec218b1677bbeb611b22f897fa9a4958352b2b403f6b469cf9217c1bb +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..953d76c --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f6f1ca6fb203f2aba90af00aa70018854ec4f92d7621107e82476c6552d189 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..ba155ae --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e7646c8d17b8ef030ff162a8aa2e55ebcd9d619fb9d33fa224d8353289d566 +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..d79f03a --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2cc8e021f0f618eab0a43e25b754e21c3a03bef9293db47fe944f9041b3ba8 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..2278cff --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0aeeee11d00bd5dd2d46a41de0e162597cdbf8ef42ec9c6cc05fde7adda3ef4 +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..268efcb --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a078bd546d40892f5214c351591652c02347defed1aa99364e24e7f2cc333cdc +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..1d0a9f3 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67537bc1af0e7bb059fa0fdc37b8aaba7ee1064d58aabd024e37614b44012397 +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..abbc747 --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19af598182c70d33339739c6c7606279fdbb3ab98dde1528e363ce053ec5cf50 +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..daa4245 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4da8bfdaa8751c1ec7fd4f34443da5bd8edded86e889f079201b470e06e86c0 +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..885db56 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd8973f4ba8afaf038253e34e3869b92a15f839064b695169add2aa7a6406a1 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..be661c5 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4eeae5075b51c122fb60873dde69134445e974c813d9182cdd8b52753f24a9 +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..54255d7 --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e9a276fad24959077729dbb3dde65cfd4a521230f3ef1bc5de7abcffa840e1 +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..947d3f0 --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428d4d5456977b36a6eea4c2d3ab5fa40633c539df27defcdda326d0fe000522 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..7aa7e68 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242dc3299a51b68da7e8f9682e24472f0de532c393817adff43e15dcd816688e +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..b8ef11e --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19830e0af82257a7e148cbf52b79e50aa1894830f55c40a874950b5515c812c +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..8331976 --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:195d562eb80912a5fadb5134b0851b11fc26bd94ff5cabf28a151ea9f9d0d220 +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..1db6754 --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb9a40c5b7cb43f79c187b162393bdf4666fdc27d873094f1e39b68ef9339a0 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..a4e9862 --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7b68fd81e4554cd572f9728a0b2b290c3f14ccb3443c27246b25456bae2b55 +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..588f136 --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f5998d4485134a3ed8c5a6c7c7e4733cb434afeda7efbed4655922e5fa247e +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..e9fa22a --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfd4d8baa90cf52c6e8ccd95956f248e9de7bb6e47462b04d038be390d52f3d +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..9c1cc5a --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f2f8386c466d48f1b318489283cd864ab7f24e15647994e12f5b29b0dd87cf +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..31aaf4c --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a388ef0fcedecd47d7eb074d8235d1a9693576f4d3082f154d38562b4e6bc26 +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..b27988a --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3dae1c5614ad2dccef109419adaa55a816c95cebac729b6794884eafbfbba8 +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..a7dad5f --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675f054950c34d1e5df42edb5aee423436c28403a3fc5c6da501098b4eb3e8ef +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..d157bd5 --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2975043c253f0d7a104c905cf77a38d63360108e977bf19901330b44884950 +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..9834413 --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:650e297635faff5b20e9a563fe6c9eff5a0dc0f4dccc06a0470038a07be427ba +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..e6bd7f4 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4f93a65d9636205460b2e4979276c0838392d6915c600d1bad88403f9b4db9 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..fca1d99 --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1051bfa1dccb2fcc788783697cce1cb4eef98d456453c1602b81a8830a142cd6 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..8ea201b --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b001c31042013e915e053e54fac5e99f714270f7d0b97a9976cfb096abf292c3 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..3438013 --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4daea42de68c7ff614487e0a1ffe67ec6a8a17a60d0eafa7350e59025deb3cbb +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..b1f11c9 --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e965ba754e3c6a0e6300e01bea4c9fcdb1253a6f4a06c124eabfe76644607f7 +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..d7bee31 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0f11a469c9a6326cfd4967c613def68e224c33fe4a580e24f3ae39dac32edd +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..adf34c5 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b4359a4f6e15ddb36c5d73f0760001758c02af374bf7a382aaa53809b7bc4c +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..d4c3896 --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3c5557f61c9d5f6fb846e07ab6f67d82d6002e4df95af2e955f0dc62aef8c0 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..f4b8386 --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95df12ec55c8f0876b550b423549a647fb55f0436154afb462d5ca96808a84d4 +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..6871ae6 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b287252a0b4eb0f72deef24aa1c919ab1b6d27618331f3d2810fbcfc19140c93 +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..fd6fc6c --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9a8deec878668a0602503961a1caf7fff856d7be3212497ecf9797dab85d4a +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..f805ab9 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7397a4ce3a42b29bca8ad5ffe325987ea0f058af97ac28a7b190d5cb83c789f8 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..e04bb64 --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c35fe5cd63cf6dba969aa53c2f5a71d57e9d630b64c19f45e1048470b5f97e +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..2b187bc --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e0ee1d69579c271c0ef0f86af8ea97fca90db80b24a64c707b056f9646a69cb +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..b75aa03 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61bdd316d6cc2016ec12d8968d3669fc7e7bc64d5264769c8bb7e92dbbd51885 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..723a25d --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2162f355d09a1bed204fe1991fce2f7b3167fea1bea357d91bef8783c5de5e +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..d85acc7 --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cfcbd46b10dd6e651f5d69c3255f265fd1d72363a7cb06a0817a6baa57d040d +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..27ae397 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a07772ed96a60160352062aba64a126606af43c1b17a489ffebe38bebbd57d +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..5a58bf5 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6440601a4805183f59679d1e83c001ef9af431b90830a635bfb5144f876821 +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..623f858 --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2714b9b692a4bce450dd221183f5e11aab60e16793b7cda654b5b486aee6e4 +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..a1df27a --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be675866ca0d9d70ab4b137bba8eaacddaf61dd142e97d5fe0082807614d8cab +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..26b0bdb --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3cab8e6e26486f7ec11c1227bac80d48d92c827c9f19ed0701e748bdc29a8a4 +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..6b580c4 --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac0c6b59033aec41ff84336f3c79532cb1645a9ebc9026e4a7fdcd4fb5cf58d +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..e03921a --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b950726e335c74a267e97cf9a8990551a594e0dba3f1152b0f8171d8ffd76b +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..1c934db --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b91aabb99d47cc1eaf9411ddc457f4607d08bce3a4ff20de421997a2ad938d0 +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..c642527 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32fbfad14696452c0048cf37295d702a57349a93222a54e1b45df8201825f5b2 +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..c58b6fb --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adccf7009156064d67ef05c428509b780c374b027214470d6cb721ff782fcad1 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..8cbf31f --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c548fd493663460e578f830aa90135c06f1ede46f6515abffe5f2b1ff3e6c5ce +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..ca31cc4 --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b278007b2c15ad3d6dd5c88eab107c05c80031b695acbe8c3979d4ad21abde +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..b05a267 --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2391b0c91a28831d77ee618ad5842b9a04cfe724e313963a9362c6ece070c5 +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..06bb924 --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5128e79d44b09afd4bb990286477e178ee4587e0ee767bfd568103443a671843 +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..87fb276 --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6334e0cb2b2b931e5ccac6aa0c62917b7a0135e531227bad7c83f56eb8799730 +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..f1d426b --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767ca3d59c253f7baf7ccc2256c36db14a23be8d4a4aded40fdf9e614d388b11 +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..1cd1542 --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee51d43759a94568d1d6f9dddb395ab591a556f8e5b4680645cd418fca2ec36d +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..8563468 --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a563c864a7bf85d1a242a748bb074e8befbf5bc80f3a8aea78d4a9ebd1f70d3 +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..db3bf71 --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5787d77ff5a67688d17e4bdf7e81ee8dfb4f7b8a87bc2af490f262944c09a9 +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..81117c6 --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500d1d65e88a725c1494f6c9bafb49116692caaf873fd42d0378213b7282c192 +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..cfdf072 --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b37f92c19db2306ba56da37f278adb4a1fc5358cf0b788b4649a9190a29e43f +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..90be711 --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0653aaffe190a05396e83e4414d1da8932561633eb08bb1457fb3f2bfbde104c +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..b776fc0 --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a4eac73d4097168ac1901f422cbe4f9b8d5c6b71a49f375459c3939c847652 +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..9b6e4fa --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761efbb0145395d96c257a183177e0148017a316379b37bae8f6eca12109bca4 +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..9cfbf07 --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79cd09ee78aff7e2571af6867c00881ee91315422a71381a7aa41b154235a253 +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..eda364c --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fce42d75c79dcbe692e901fe44c4c1a604309fc153f807863f84b7b0ad3b34 +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..3407f0e --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e31dc016eee85c7bf43984c88d16916e2092d22cbf0a53d86315cf0360242f +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..db9ae96 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9866ee50d4dcef48c9f884a105c0632b771e117f8b69531f0ef603434f7dd9a0 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..9acb414 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd07aea5444802d26716a8d65f342e424c99b9d2420dea161ccd1607b45b8336 +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..b5a5e57 --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a68217539aef8400120ad8410a2eb35687235a83d8d13fafce43d58dc3ce6f +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..06a7a7c --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55baa4883e9a374d2115058d7c2adfb4e4ca0fafae92e3544e4f6ebc7212ebb +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..40494f8 --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32683141faceb21e5aaa04c9801bb871dcb7890e8884b17bf7fe9a6e1d4509ce +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..b1ee2c2 --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36ea7dd93fc62479c318bd788f6b655424e920e88f6134e24911b82f7c5158b +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..edc352a --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4099a406dc6dbe0d2ff0755d2f70a67ad451fa240ac8e9703dc529b1d7f55fca +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..085a5f2 --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d4afcce87d1129bc95b95cdadecfe181a7422929654aea372763cdf37371b4 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..2150867 --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c28e463bfce1001dfcf911b5fa538a3eaa10ec8e5a73c501b540b947098fcc4d +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..2c59c57 --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e6b6d1bf7746627a9a545f5a16fad9756d316cff69d6af86b87771bb9beac6 +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..620fd67 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fda3ea6b7bfd1c0750df998c0d69ced5244fcefde462ac9cebdfa2caf85fc7 +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..a9f3f37 --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c499da595a84f7bcebd8ec47fdc769672f0866540828e1d0cccbde875ec331ad +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..cfcf805 --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daeb3d5285026047bd687c81bdd6020004cef765d6712371dcf4ed0a4b55337f +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..a81ba21 --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85521141cdaff3feb4a40dfe84c037ff70944314de8679b986a529ee3e43f598 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..eed2ea1 --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70194c2d7b27315a4929cfb33d4e4e04c41d38d2cffa28e5e072e6f1be58da5 +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..06c5530 --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee55a71d21ca6559d7c78bbe560b4f3a3eed11ce6a341ee47f4a0b5c1c5c952c +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..4b159ae --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b0da97543d56f1f888bdcc81a6794e4d6935ab8a6191d43c2b70799dcb4c45 +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..5d838aa --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db038d333d1f7ee12b1a2af33876d1877e4025e1b773aa32a36b80798d55d41 +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..8579bfa --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1734ba69f88b99ad3d56a3a38032d19e4125c26110c6ba9f7433aaa8dafcf40 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..c979cbb --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6300bbf193434810e2583d6a08c86b4b94a64d107226c8119c12bdd2c701550 +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..e19a759 --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562ad2c0e3ba250172f78236edc0da959649605f63bb160c8ea4df66eb2d1575 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..d2212fa --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:182add061384a7005cea2dc037ff73e57ec3babf5fe7dba978a1072b8d5cd4d1 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..435c316 --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1285aaad74700f77d294c303f34df9900be1e0529041e7c8388684809677107 +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..cad29f7 --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e439d0c8fdf79f74b70e99f75be43dde9cddeb707d1357591b46f8ffd4b3fe +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..5639b81 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a890e12277968ee42c71ff5011c04645744801b49204251477544275bbd09f25 +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..1cdadf4 --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2628fcb44b1362a23216e1ea0c0d9c6e16709f9d0def0b6fb7e11df6556ba363 +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..6e48751 --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e827213ecef5a4e5c71ae632529aa670614816e11043fefdd773d11edfe864d +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..36b796f --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fb1415ce1f165635de45cd94ad256bd6ab86f51c5b9abe4a30f09aa70ab4df +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..c551172 --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9302393a09787299a2ea2a3a91263c38987e931360a4b5e96112aafed52b3f +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..957fe1d --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae567e9684279f57a39e26c51ed50524c5e2369d21f49a91a5451b6666b26dbc +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..39a8220 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64da14c729ba5634cbd15bcceb182027511b71a90c54b5c4f1525d8f80737517 +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..8303981 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94bccfec6124d59a3cd05bfc6b5af79915256ef40f88be0a0d53c8c41477db70 +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..87fedf0 --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9df0191675c2cf1237fedff6fdaaf36a169314e72c6fd2e3acd482e58a7e36b3 +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..f124793 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce36db91ab702a09a61855b528415a7d4c9aea126cb30dda67139ef593d757d +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..443204e --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124eee4db84e0ab1c2a4e5957b5464b3b5cf3dfb40fa1133c907258341440d98 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..04c407c --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a0f8b05586924391cac721f27b68ef4180ea40b4b42ea590909db5a3be3cfe +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..f6ec32c --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97eddf29c69d619b6fadd3ac8a1c6c9ba10d656b1ce8da5705edbcb4381cd1b7 +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..efafc73 --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a47b359b52485096876a1170a3ca0f3a4d9a22f556a100f6531eb61bb8af64e +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..92fb16f --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2bad8a251c29fdd3e8a2747cb35e243eb073becddec8246fdb5575fff9f160 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..ee1950a --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c30e97935b1e21e9fa98e04c53de5d0c1ce9fd235b3ad95c1379def1f98e62 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..c2813db --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b6df47d54730d07b6eea9d2e452ca339ebea1f6d29a77cf0dfe28eb605656f +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..121d6a2 --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45df6ef222c6d2ac4a489d857ec0abae32ffa5156f9de8a3cf2a7330f4276705 +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..8c44f39 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caca701a2c4814421f8b2eb4279233e47910ead9e931c32cc90ea78b3e9c464f +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..80c8d5d --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3f11ea793266b325f857581c5e8845a60821cfcbcce19e72a4d729cde66293 +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..e56854c --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cba380169e06567625e91478e51b4a24663b5b7f435db6691ab2a9abb810d1e +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..d683534 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35108d9240fc2ac8f850469eab085a179b09ff1424f334e32bc06e27a6c69e01 +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..72d8aa6 --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afecd03781a194e0949ef018428a5020179ae77994080665675182aac81827d1 +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..8dd98e2 --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a940a2509b0b228c61f409f3509d59f02dddf99deee1b34276035871561185bb +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..c31496e --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a03475cfc82300d7572a81728955bca69d47e94be0789a1326fafd78006913a +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..0555905 --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:019f990a18be44cd61d2010f831438ab43a9ecf88cfde4e79ee31a01ce8150ec +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..014a69f --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f55ca63a261514906e497f11456709854b3725dbfd811cc9cce6e83ca7f9e104 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..97d2308 --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644a0603e9a378cf9fea2e398c88317df8c898d55019d188d05d020e6eac8371 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..3c0543f --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d91458155a53a6c6ccab1c8dd3b270d75eea441a9dd997b7717938b6f8751c +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..bd21fee --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f430f50625e59a8c5eb16282a24e259aebe0b3fd288984579c7b6c65bb3870f +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..81e5d2e --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f0260125ba4372e3fda2b67940f939a60c3c30f93ac5ddeda0f66c19e5e2fe +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..e1c3890 --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f433eaf784ea18d58f939b23fe53836576803a611817435f3c887b312de8a2b1 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..ed3d302 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a52cb9542fce67596b1c406fc0954b849e0ad793d7ddd40aa126b6cc7aeb8fa +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..ed89de2 --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926559e2fd6a23d4440fd83d6a24b2a3adeaef34e9a73f5ffc959c8b7c14d456 +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..6947060 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c82ab78de6060aa692b7f5f2c032ef13c1c961b211fdd20656734d938318543 +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..241d125 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4748cf90ae66a51a134b7e059b0986dc61658aad78403aaa50a61af411db15da +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..9a42849 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aab68f21da51bf9813a9697bbbc5a18b445b6dc1d345d185ccd5bfda2fcce5a +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..a6b8605 --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e112a059ef50cce098dd8be1ea2e3604182b838ab6baea4c6a176dcf86b80c +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..89e34bc --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181b888e5a05c48cdc6ce27feaf1a574f8708e84935c5ddc2541af903c2b852f +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..7dacff9 --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79475c6f150c9ad7ff57a69564c12302feee9bd259e42a329d768979667b305e +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..573d331 --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0995a35b2457e661770cdad8787d10e2b4896a4cdc46108d307b0d19c2abf0 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..f9b17cc --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bf70ac613470ae99552e28e1c831a24bad2719d625ac3abac19782a74eda70 +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..c1efdc6 --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6ca15d054cfab8a1ad9ad932a8b35bab9e9de21ee765f1fa008f282230b055 +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..2c9696a --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2ec76b96791058097ff3963d5281ccc386bafa71fb45f74b97b7c7f61a5f32 +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..d69dca1 --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08da52ceb794f045edf1c175ea1ced18feeb370e33a22f6671fd060e0045850 +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..88635cd --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9166ae3dc863958ceebf3df93b1e97862aaf428a2a8b3e6fb2de602679206b7f +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..0674862 --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7ec36704552cd4924f2ed91dc7b43c19cdb685925e477eb5919c89cab6a6f9 +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..e1952ce --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4568ec7992cb510ea1e871397382ac5f6829e2ce710a798d05add522a7d59186 +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..96ff25d --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcdf1f0b8b69a7871a8c608c6026875d71bfb872b37b09e9e01f4ecf4ea192d +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..5fa3596 --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:852c87d99e9158022f9767bcd2d057fd76635822b4e58b7509e94c914f73ab43 +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..281c62b --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd0490969bc671d9bbf497eadade79b5d18239d1ace5556df636e9e4d853743d +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..ede517f --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c461fb6e7eb3748b78dc12cf080c9fd29f335e6757b29a966fe22e6800cebd47 +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..ca65ca0 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8e5948d3f8a6d8967a3b08967794ff57844ad9dfa604ce116837a44055d263 +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..e4b36c6 --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eee52e1cab103231e5acdddb9e0e2810782e5eea3086a9f53b70dde0eda818c +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..7e9a833 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4aa9aa733018b8ce36bfcdb0edddad4798e5bb1d1db908c67848eaec8a29ed +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..507225e --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfcb15ad583cec9569eed73796fa0633da104cd94df8cdd80a7a56f2398997ec +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..21a9e90 --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b9d1fed7971764b4eed8025bc9fe5ab8527899b431e8517305943e13f250b5 +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..cf69004 --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb7281743416234ea3c1032f8653d0f0e1cb482ca5cfbd5a2e191973be340c1 +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..56e691a --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f51a3abb2506e87c707c1aa84a9e45b2aa3f093da8ea7386e7eb6f5635a6e2 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..ae06c79 --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b199450b47cf091fe451a8e10e21b4ac42edd8dcbed3b1dacb4d02f1f0e9a6 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..f98fdf5 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf59eaa5f98f751fe380963e29e08e04d083b525f640cfba128e6df50bf6d74 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..ea39245 --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c4510367a7f6ff5d188352b1ae28b8fb03eea1cfb089f9fe2681e270ee4725 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..8307b8f --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a35ad74023b981d09c89eb1fc5fe2598a174e03d4b4b575721bf609b6825a74 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..a15c518 --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815578888fd8e28d61b1ea02d605082214824f30110f37cb3af284f368a0ab24 +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..5661c1c --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335f54ffeb7ebe760456b416f10aaa1b6621a0a8dd5fc58eddcc1d6bfac622f2 +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..9a6e8b0 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be59597755fa957a282949e97c96737297927cdb391c7dee2827e2a4a18a930 +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..62a44cf --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c71c606fa7381fe6469e91c260b2655626d3e025990c46d48596120088ee12 +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..b1de41e --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15af12e21ad2f5cee3e9f8a42be911f545aca4467cdcbe7b8bbbcb00b7948f4a +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..3d87813 --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e298c45466cf579e1e0b99ad7823abf69607deca0656ee4faf5a7ca049bfa1c3 +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..2ee00eb --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e65b5faba0f75bea9c86b2862c71fabec3fc0841045db514d5335bd0a784e6 +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..07740f2 --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c135c1da7f7624821f90ad1261c2d524ab2617ea1e73dbf01bf3fa0eb2afa6 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..bc11bf6 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b3698dc33f9404fb366cafb17baa4631b6d25f1e7640324a9fee71d8285946 +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..60d7920 --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2fa85791b747595984a7254600727eba7704a0f1ff50a0a2c2ced6d287ce9f +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..1659bb7 --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40e8d98c0b8a3fa6e188d5486013e7b216852f0fc49042e87609d7f5897ce5d1 +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..63eb2ae --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd88cfddbba974964b11ba63a7a0dcd7734370da65e7c162309dbbc8367ddc2 +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..d90eead --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482e380f79d1287ea4e364eaa3185e37e4a534f39f975edee9d23b8d84bc7f1d +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..5a91c9b --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15fd74831a8d9d9821253fa55fe2f8db732375e05317add7e94bb09080c87bf6 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..ebbc4a7 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f567a2ebd37e88f094a2a9108453de1886694ac7add59c76e4e565cc6010e177 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..6218699 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7675c461b076776b852c8c62780b19b1a0ab7a7172204df1b4c37d5604ac532 +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..b4cf808 --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6fd92b704c531398ff47a3a533a509854e5a6106a845c7adb477d56bf870a0 +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..d1d1ced --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5401775433db51565b0ba4d807d68bb990976035b9c8604cc6185135a66cbe06 +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..0c75896 --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048804022dee7d65b247888a701c48f1c14c03db0cd02a73af4b8556251d5480 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..3c198f8 --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b702336e4351987470287aba5d7862b6d29335165077bac13026570681100c2 +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..a25ab59 --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec240c11d01761ec8770870d60b687a24455d992792ff6080f1c16da4e1ad44 +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..514d226 --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73db5a86e6393f378b364966bc823cccb7e32307932d11572f755108b1146f4a +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..e784726 --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b1226890753d4362f69efd1ab11424694a782855d7e00f96be97dcd8d21994 +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..9c24a1f --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f843e617ce0fdcfce4fa4b3db7a8bc03e6c6ef4ef775a774d53e30166a57bd1 +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..2caf04a --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4844a2a95a09bd14b35b935cc3765031800852cfa0dbcb3561ed399e3fa63d8f +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..03a8630 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9f1caf945b89fcc641cb1d9a592800d593f4f56ece2179884ce5357fa200cc +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..29bd934 --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aba96c4e69cbecccabf74806b68b4a2ec34b94c55fe1337c680e7bbbb0e2a02 +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..7020da4 --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f03f6eead12adc60531c9e45df9cd2b0368ba8ba748e6e3f1c3d07a10104d91 +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..52a444b --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bdb42180c99303a643cc76e0b9e76f4cb7c50e40c3e3b199019a1744a4c965d +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..a76701a --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab77703f7e85687addee0d330d35d85ae9ce1501416007256cfefa05f3d0e5eb +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..1479314 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3a4d9702a6b823b98648d90aee7a25b530857437efd3212675a67af4307cfd +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..d0a2a6b --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00bfc91f6ee18d6bcf21417827d4c526c0813d74892e464f8d08962e8ef1ccdc +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..9d214df --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d96d0933d3efa19dbcad36421b19023d3e3825aeda7a53a36ef974e7f25685 +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..892d2bd --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389593301d8848222dbd13beaae71a966898a14299624ae8ec0425b84aa2c9e +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..59acf27 --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:408e7501f055d9659be0fa77f990dad85ce01206e5650248dab1debc924a0c9a +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..1cf5827 --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e959302ddb5ce8c4f6aaefaee50dadd5fdd4757c6d6c7ce752abbc212ba9fbf7 +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..666d52c --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc48e421f84ba987061362f762e83297eaa4e4a911391ab68f48da3b8196092c +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..5bde51e --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e6ff9b436b5e94e6743e07b7918c076abe8f9f4deeabe11637c3fdbad8afef +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..2490e8b --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28acf8387902e315fd153b2eb22ddf387cbfee3afbd5139c676b58d23a7c1beb +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..5c18e8b --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146dff1016667bf7d326063f291cf7eaf041ac5444b20d873cbb1d9807bc8e1f +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..6e78186 --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1c2f2e55f16f33e2206b35ad93fd21c266f91ef98afdd25ab85f9a656664ca +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..540f5dd --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2205060ff50f2e86ce9414f9d621a8e38cc7f8dae3d6ac7f97dfab14f5aa4eb8 +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..2d80a80 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2ab4980334470135e7df3f7d9deaef359808f5b406edcd3dd33f29ed1c3fcb +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..c5e74dd --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29741685f6306973da086c803b873264a4b0c0bc75653697ba8bf196128744bc +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..820ab10 --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3063f63e6b3916a555806dafcec42aac4b64fde6e9a96d2ccd383a39e43de1a +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..6e8d772 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4849924496bd7bce6930646b9d34ad36752fe6b76365cd554175d985bf28bdfa +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..5be6f7b --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078f4199bc85e1d343593d4d15c87c222d7e6cbb43e37593ef9bfc55f5c18054 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..9d591e7 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0710c7afdabc70fc4e2354345b3cfcec1f057c7277227c63e97a3939a4c78cda +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..3bf2d16 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88ea4ad31d4a103f42dbb88f2fae059e74229ac72f9dcde7c703b216c51309b +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..5d5f133 --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64664768bdf663292f509b62dc02a4214cb56d1f0682f23c26420c743218508 +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..08565ca --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9cbd048a4dd6ce67e737908ed233c7b4baab8f7ef055598ad0b82a8a88c4ce +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..74dcf6f --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0a8840a3d7153ae0209ab1abb5086d1413fa96cd6831d9b9896d54980eafe9b +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..be5e5a7 --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eaeb6d494a18200d8b2a61ae9dc309b53ffb0410c0582e31a39e737148a9f95 +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..873f04a --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bfbc6e82913570aeb37b2e885eb1725dba4bc024b43a0dd9c71b542abd9d1e +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..9d842fa --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbada489ce25cdffebf324ddb0cd93a02ed419eb462a2697ba7bf87590fc61fa +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..2f343df --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8603a4079a20d526c7224b8e80ae02340b41eb3db07746a44536d73e6658246e +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..4ca0002 --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ebc24ab9a7ba74589b07ccef8ea2b29b53126666f2286ec8b74524073d2b18 +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..e1a80a2 --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2318b1ad0afb60bde5af0da01853dee2ac4ff1361e9b88ecc30839daf10b0da5 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..0bcfb6e --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458322005cfd6262c6bc58fc3fc09fc63918f3ae188c1f360beb88fd27506f8e +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..8521a9c --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc036349c49c5bbe3992a1fa4db23b6aac6fd44cd02b6f87072ec8b1f19cc60 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..0362d9e --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b8ea2e5476851c0b0d0be64dc5a23d71e4ed8e2df016daac9d3d408f400e67 +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..f8770ff --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d62c4fc8ff9e47fba41f96465e223affa85200055f0ec1d9c0acf48395afdf4 +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..d3c0e12 --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522901c048951a707f97db7db9fd54619ffaa52a8944b113b9ebc0a861d7d5a7 +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..835d268 --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea59fba2215bd6d9d9b5f7faad0573cb27a20abe28b55551d123563a5be283f0 +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..32e6a87 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa4dde2f3e0b84b86602a0eac6ecf88f800d73dc17d3d4506da0b65543d611d +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..f1771f8 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46be0616785c239079f9f20ec8da7daa233eba131bc191d4f161ec626b9d120 +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..709f541 --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35dff6cb4a812f344f290273a5a82ce4828e1758af0123116768ebf0a664c846 +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..565f097 --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae366df932b3b07b1bfa4c6815d5a62cdcde725e9aa27201f618ec6264b7b79 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..09bb37c --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284d2ea9bdda8025b31813bcd1980c31a86c89efef48b3ed45839dc8d9b2735f +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..3617d93 --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae7da0c7f7720299c705f9d8adcae24008afdc7821651bc98560ae679ff66d3 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..d232f25 --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c6a30c0c21502c63bdd98b37e7c8e9d5c4b3e7ad4072493bb6572bc2dc7f65 +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..c9da1f4 --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79885f1e07434d64393e783757345e8c28d55af99691db5b548ccc7174364586 +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..80c484d --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f4e7dd8cc9403440dfd310be7dcf56834d0f6dc8edd2d86d489d62bdce7bc6 +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..26a283f --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c088f26c49fa6d5c01b02d40cb19a7307d0ac15f5bb615cad7ce25dbba3403 +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..c5e2b12 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edd9efb27ee60bfe7a9ed4507baa8249be482a5a75281db32877b02af209071 +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..e8dbe97 --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552916e3f6d687e3ec197226b38254758072d0735a97cd5502548478de591754 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..def88ea --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923c84eff56ed52b97e2540851ac64980a61f7076c5f28a593494494531ffdcf +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..43b454f --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1d5e38fdf3f3340fa66092284bced1dbd0f02fad7850a89e54c32308398159 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..71edcab --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f821a3f6c4b0ef3a217d7d50bfd7ad84127dce6ede53e8379b0d5191f7c690bc +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..5af992c --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ce7d50ea603351bffa8fb48cc740c5d5dd36fffa34c74754484ab6cf978db1 +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..7c8ad15 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ac15513e891876864c3aee2e8f0c4a84290e95f417ff35048a9db0eba53494 +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..fe22f4b --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a63745a15dd60bee68e5768081eb4688973ec0bb67c948dd8ccab6daaff2cbd +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..740a436 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d763cf53a21c597e88b7de5a871ed7039d687c25bd26a7ad6880d27f139479 +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..fae2b6a --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f17e1ce109a249aa7f40a5d9ae81327ad6a3b56a0ab46f34d1caf6af2d72cd +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..a07e499 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0454dd7301e05eb7228f25ccb756b1cf80ac99d16b30562dcc9b629056ddca12 +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..380bb19 --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76835592fb6baffe81cdf7b3ba2a686a5457ed52fefb3406dcf33619984c314e +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..9f4d260 --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846623dca1cc63109d43f140fd068eb152574ea8ccccd9367d307864dcb9b32b +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..d07c99c --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1529b246f2a4fa94504fd88cdeae7d722c65b3ab405c8438b1649c2d8a0826 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..4d206ef --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb01591e1077d983fec7e1f131d4234bece1fd1923b75f79e47e06c7f020138e +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..87235c3 --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636a3eb9b5841463533450aaf1222449d65f7b1029861fbe0c9dab5f25554827 +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..cd1e7db --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7a26315a2335d0ce6820982db76546156f55801a388007baa3d10c1768cb7a +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..e6c7161 --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e16e56c28001638501d0762d6b45b64c4e4203f6b71cc85a683accbc25c2c3 +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..cfb88ab --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba26190fe11ad4e3541cd5d744fabf90e5f31e0beb0f1fc981a6ad5e21a7bdf6 +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..f5862b8 --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceaaa1c8504a6db2b7b006f65a8873ce9975ee0858bf6c8ca04493e99a91466e +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..c30132c --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6d45ee230712d90433ecbc3196c996b5f2d6a89c72ed560b0f212721a4b303 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..a7a9b1a --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91485ef565e5e4b371ebe26595f994a3982f6f23d1af18ab66ed06bd92bb6f9e +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..0cb671d --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeec587045ac77c30b42d6b8d36ae476d139d51d60bfa0051b83bb70bafa9822 +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..86cb74f --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28962133cd70fd99b6a19a772c72c062fc4a05e76256b072edb5186896f214a +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..c3e01fd --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7666a888c4461fb0f12863f3f716540542b6c9d3f7417166dd96afc2525d60c1 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..d5013ac --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95ed4694f7f39b6f06624152157cc7b142f9c23547dc276b6a84a4c8a0dd8fc +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..5002228 --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53016e805ba9957dae6db73b272da43f947be30204f323c6477d7fc9d44876c9 +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..62cc112 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:852dcadf6e55f3f5375f7b900626b3d642e4826e2988f1dd94b06003b4c5b10a +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..b8ae60c --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229d10eb1d42c3aa67f576f47cf499daa84feca4826aeba3547002310d4be0a1 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..e8ba343 --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b464b26ec33810bf62077e80ea6062a2bca98c1c3015a5c0296ed60a8a40d5 +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..def92be --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccdae82278223f209c837d6162b42ff58240c16f4fa48c6532529faf54ad2a23 +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..60477f0 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154878af17573ad165504ccd5868eaa7104ad0921b75949185c0f758261f3f60 +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..8194e6f --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b178f24bc5e44b7bcaa6e546a1b046831800aaf211226e9ef4f73467cf1630f +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..d279dca --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0617cb97d2652e0054180e716ef5e784d9791d4061eb45cade52ecb1c2468cdb +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..4edb783 --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ab220852efd0efd08f3381a509fd3acab9e280253ae9eadd196dfb84c43151 +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..a940a37 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f99299adb3e25711f455755aebf7a398b0d5dccff015ac6b4aafd277e8aeebb +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..fef8f9a --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ebdc34bd65f6e4de96fd78a7b7c45cb49b340d0b6300b3a31ae368409b3432 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..a23dd34 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996114072cf4dd6f0478fcef2cd341c8a21ec60738693c845d6be031beecd3da +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..87a7e78 --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306a1a1e597dd5ee1602b8ea6acb2827907ae9117572e71555adec8cc283b5df +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..5debe79 --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee827de8fe98ba9863e3df8e893f398ce9fec309e84735202b7e48c0493a924 +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..1eb21cd --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb38251f03cdb080ace35e396ab762fe8b286eddea5affe4f5c6208ae160381 +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..61ee7dc --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfcec54b4cb4a7d0d3b372ca7a3a585305318d2dc8a68473b5d0786e64fd5562 +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..674fe36 --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd190cb4ac97e463268c40f5172b96619926bddd08bc834c3ee296b60b01d3d +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..f32689c --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f531c6db9ae94fd857702375e5fb2f49430394801bf921e16ceb58537d6fa149 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..3d16e8d --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16e21dcf1aee558262751c4d7f4094de07e4303247f4ef2f9ae550c7fa690c0 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..14c5454 --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917bdecd84700ae331888064088592c4eb70bdab3199f3b04822b0a075b56628 +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..47b00f5 --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24cdbce0d15bfdcc11cfc56f39fcf55c53ec1855f6e3aad105710e7b09d343b6 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..3ac6180 --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5684cfd7724c3036b6d923e648eae6b066673ddc1df6a7e3ca6244cbceaa4262 +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..89fb899 --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747aebca8b9c7b9fa4f3aa5312095af6b7e5a63696241f6bcb7c659e85a14bc1 +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..f485b8f --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b5aa31c73f0ab0413766a92c8b31e0887d6620d5793480e7ca8b2490010693 +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..559826e --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f2b285ffcc7116058c25ad6a766dce09b4f1cedfcf2e8d6cb22d005f320881 +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..a352c18 --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612a98109f16d888e2b3daabd795f0663c892112c78e834afbe826c3fcd2da4b +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..0c79fb5 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82acc79b5e957db23c11ace91f6e2bd04454603649601a87b0dbdfda4bd8d9a0 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..b819bcd --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fe8bb32ce0e7f56d05b1ecc57c0b62821ee02778ef882fc5825c9154fd535c +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..7662ecd --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6d41222c45797501b46fe94f2b505d08d4b9c768affadd4707a8528bc440be +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..8b04432 --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83df917503c7344dc4c25142d86a88bdf5782ab7515ec74e9071fcc84a2e0bad +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..6bdfd2f --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26e71f0636237d8c45006713012034ad2a86c794373f245f97dff3fd968698c +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..0fece27 --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938043e13b08695b02fcd752f69f86767b83ef6a5251b04a1c825e4270254081 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..83f94b4 --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f587305fe5298e27769453478945c09c1805176acbd00d7af7303eb63db0f49d +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..dcf43d1 --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8121cc1a0b83c389c5544cf316859d9ec7a963cf6b02a309b4a8f79ec84c181e +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..193b02c --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c21178fa9f840b650b0611a2b82a54c5a0751a2da0446143825bc81959b7db9 +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..c30a767 --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62ba2b65ea16d2dbaf50b14692b39de6114d5f0d4065b8870a31de7ba9a30fb +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..a15706d --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a84a4c81002d8272c29cd35aeceeb3f7b3d124912e22eec626008a6fbcd417 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..7f4bf6c --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f143c1c2882298e2573e6434446927c1969a74e68ea1265d02e3fc92d69f487 +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..be6816b --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842e17ee903a9026cf1b40243ef1584e9b1340e20c8b3efe24bf073b688fcdc9 +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..37e1331 --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ddeedebad488c1af3e4e09bed431b99af40cc4f8a061b38f4c4e94860f384e +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..c232ec1 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57915f20ed93f200ea7606503712d88055a3da3cab730a8f0928e3c88e8889d +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..41c2e11 --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145c66b0923d3e1bb7811a55ddbfc830a78d13c2ffa004bce21aa308ff3824bd +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..685cc04 --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8953a84368f7578f5fd33ac7f799375e02e463f782359e58d23392a05af58bae +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..d6cd007 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad6628f984b0082d3d2b55a97c148136277f44e34c39a81fb9f97eb9d1e251db +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..33cc718 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f130a101300e9ffd9b7c7226c38015ebc88014b2d346685761752d285fd6d1fe +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..c0c7dee --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ac60fd235da8308c0c3bdbbed8b4c700da26a34ba53605698ec2b1999465f4 +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..def9f05 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b568cf44e582894a6f6666a63ce21f9c45b8b908d4c5ccc761569faf38d4d5 +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..b926108 --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67853412279ef0acd3b7c0800ae9a46d35c6d547efe224b54adff73ce9c5a94e +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..7d74779 --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:108ac01d60c051e97ce5a0783387caeef3aeb3a051313a0b3c018adc5543085b +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..3de399d --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2248a078df4b2894aaa112ac005fb41d6502bd67687b5ca912644bf2ec97de37 +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..e403fde --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e135ce86bb23379d7997c49ebf695959f501ef763c2dedd409faec26761583a7 +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..132dab0 --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed7369ee1deb552a702fe08bd93484cc0a536dbe9ce57ac3f39a11296a4031e +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..aa15e16 --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082b8cee7a9ae6148017e352ab94fd8130bdd76d4619b17449217a0a301562a7 +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..e258985 --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90443de5f5e85d32f952a2c8bc67b390f0ed92fd0a75c7a8810459e14f31fc53 +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..1cdf24d --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f7a73fbe69a0e5525e10e739e837a0df21be5325f9100c2e8193819b1017ec +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..8057fbe --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba764790062f1962b13bf2b7fa96c8f91c310a798c8ff862cc6f1fed0f0ef17 +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..7973174 --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34922aefece5db188b1d366fcd5d71a8549641ff9d8185e4dd5c359ca16d8752 +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..a782b3b --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18339ee9dd39adb8e8169bd63fa0fef8be2b27216284a09ff8b30dc16dd5c12 +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..8a9947f --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e082748d5758eb95cf74b7d20a74632df5e5850fd6764dce152fad962c4a8c +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..1d6cc48 --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dfba7ba7b6b92c64bfe82948a171c069770ad7c321e17d53b5ec47c84b8db35 +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..0c8e399 --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006fe8aecc097b61073c0696ba8c4567b44f457e5d06ff1bac40156b23dd34bd +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..c073044 --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752c26780046904e1f35bf783f8ac889368db0f37757f2b1a46e08e2aba89b5d +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..47fb73c --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95fc7921782ae559298b8e1ec660cf36ae758b039fbcf020705746a021f36f14 +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..66247b8 --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610f6fada031bc34b7a1adf10b168d609dcc1401774207985fd006e0d860d870 +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..10881b4 --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82586fbbe3b4e8e8603cb3561407d8c2db91a14d948653b8d950a87624e4417a +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..68f4402 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e907394e30d6f1c755e166380bc9977e45b0946f8ffaae48e88181deffc5f4a7 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..b966a08 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b709a686b69dedc3881a33de88ff6fe4231d0b78135641dce6bb1bc274879ceb +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..73aa62e --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bf6aa33ad5f4b4392281ee26119dda13e37d9583e2f94bf98293f309ee0879 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..54ed29b --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c250113d7230cc8f6c540314b0c23f5cb208969ff7511dd33ce33de201bcb2e +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..6cb68b6 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db34f6e6a8962fb09cff805ad1fe16ee1f0bc3bb7d4603ea49e9181965dca5f +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..f3286c7 --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a51cf3b1608151bb6eb4a19ab79a6574bf8d4d2837b6b622f18a5fc731c2648 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..34a73e2 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a669832fcd286f075189f6421d50614d8e3c58d86f19ba8125d33603b852187a +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..6536d02 --- /dev/null +++ b/train.log @@ -0,0 +1,1162 @@ +2026-04-29 14:57:50 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-29 14:57:50 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/workspace/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, disable_thinking=False, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-29 14:57:50 - INFO - __main__ - Training/evaluation parameters NewDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.01, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eta=0.1, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=200, +eval_strategy=IntervalStrategy.NO, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=None, +hub_model_id=W-61/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/runs/Apr29_14-57-50_bc4ce3cd7c4e, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +q_target=0.45, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449, +s_star=0.4, +save_hf_model_artifacts=True, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=50, +save_strategy=SaveStrategy.NO, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/workspace/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=new_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=llama3-hh-new-dpo-multi-beta-sweep, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-29 14:57:50 - INFO - __main__ - Using W&B project from training args: llama3-hh-new-dpo-multi-beta-sweep +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: - Waiting for wandb.init()... wandb: \ Waiting for wandb.init()... wandb: wandb version 0.26.1 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /workspace/dynamic-dpo-v4/wandb/wandb/run-20260429_145753-76c0wk78 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/76c0wk78 + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Normalizing raw HH preferences (train): 89%|████████▊ | 38624/43598 [00:03<00:00, 11073.91 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 91%|█████████ | 39736/43598 [00:03<00:00, 11083.56 examples/s] Normalizing raw HH preferences (train): 93%|█████████▎| 40497/43598 [00:03<00:00, 11429.29 examples/s] Normalizing raw HH preferences (train): 94%|█████████▎| 40871/43598 [00:03<00:00, 11152.48 examples/s]2026-04-29 14:57:59 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.). + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( + Loading checkpoint shards: 0%| | 0/7 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Normalizing raw HH preferences (train): 21%|██ | 9157/43598 [00:00<00:03, 9681.76 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 24%|██▎ | 10314/43598 [00:01<00:03, 10209.84 examples/s] Normalizing raw HH preferences (train): 26%|██▋ | 11472/43598 [00:01<00:03, 10600.04 examples/s] Normalizing raw HH preferences (train): 29%|██▉ | 12644/43598 [00:01<00:02, 10893.21 examples/s] Normalizing raw HH preferences (train): 32%|███▏ | 13797/43598 [00:01<00:02, 11076.05 examples/s] Normalizing raw HH preferences (train): 35%|███▌ | 15416/43598 [00:01<00:02, 10963.49 examples/s] Normalizing raw HH preferences (train): 38%|███▊ | 16651/43598 [00:01<00:02, 11167.60 examples/s] Normalizing raw HH preferences (train): 41%|████ | 17786/43598 [00:01<00:02, 11216.13 examples/s] Normalizing raw HH preferences (train): 45%|████▍ | 19433/43598 [00:01<00:02, 11125.76 examples/s] Normalizing raw HH preferences (train): 47%|████▋ | 20643/43598 [00:01<00:02, 11235.30 examples/s] Normalizing raw HH preferences (train): 50%|████▉ | 21794/43598 [00:02<00:01, 11308.21 examples/s] Normalizing raw HH preferences (train): 53%|█████▎ | 22942/43598 [00:02<00:01, 11353.47 examples/s] Normalizing raw HH preferences (train): 57%|█████▋ | 24640/43598 [00:02<00:01, 11301.79 examples/s] Normalizing raw HH preferences (train): 59%|█████▉ | 25788/43598 [00:02<00:01, 11346.82 examples/s] Normalizing raw HH preferences (train): 63%|██████▎ | 27415/43598 [00:02<00:01, 11165.59 examples/s] Normalizing raw HH preferences (train): 66%|██████▌ | 28650/43598 [00:02<00:01, 11285.08 examples/s] Normalizing raw HH preferences (train): 68%|██████▊ | 29824/43598 [00:02<00:01, 11401.49 examples/s] Normalizing raw HH preferences (train): 72%|███████▏ | 31473/43598 [00:02<00:01, 11251.39 examples/s] Normalizing raw HH preferences (train): 75%|███████▍ | 32651/43598 [00:03<00:00, 11352.36 examples/s] Normalizing raw HH preferences (train): 78%|███████▊ | 33798/43598 [00:03<00:00, 11381.86 examples/s] Normalizing raw HH preferences (train): 81%|████████▏ | 35446/43598 [00:03<00:00, 11234.60 examples/s] Normalizing raw HH preferences (train): 84%|████████▍ | 36647/43598 [00:03<00:00, 11298.25 examples/s] Normalizing raw HH preferences (train): 87%|████████▋ | 37783/43598 [00:03<00:00, 11309.96 examples/s] Normalizing raw HH preferences (train): 89%|████████▉ | 38919/43598 [00:03<00:00, 11323.41 examples/s] Normalizing raw HH preferences (train): 93%|█████████▎| 40644/43598 [00:03<00:00, 11233.77 examples/s] Normalizing raw HH preferences (train): 96%|█████████▌| 41789/43598 [00:03<00:00, 11286.97 examples/s] Normalizing raw HH preferences (train): 98%|█████████▊| 42928/43598 [00:03<00:00, 11314.14 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10835.60 examples/s] +2026-04-29 14:58:03 - INFO - __main__ - Training on the following splits: ['train : 43598'] +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 14:58:03,929 >> loading file chat_template.jinja +[INFO|tokenization_utils_base.py:2323] 2026-04-29 14:58:04,322 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +2026-04-29 14:58:04 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-29 14:58:04,438 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 14:58:04,439 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 14:58:04,452 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 14:58:04,453 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[WARNING|logging.py:328] 2026-04-29 14:58:04,456 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-29 14:58:04,457 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 14:58:16,300 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 14:58:16,303 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 14:58:16,304 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-29 14:58:16,306 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 14:58:16,306 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 14:58:16,307 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 14:58:16,308 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-29 14:58:16,312 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 14:58:27,769 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 14:58:27,772 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 14:58:27,772 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-29 14:58:27,774 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-29 14:58:27,774 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. + Tokenizing train (num_proc=12): 0%| | 0/43598 [00:00> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. + Saving the dataset (0/2 shards): 0%| | 0/43598 [00:00> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:08:06,790 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 15:08:06,790 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:08:06,823 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:08:06,823 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 15:08:06,823 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-29 15:08:07,108 >> Using auto half precision backend +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-29 15:08:16,065 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-29 15:08:16,065 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-29 15:08:16,065 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-29 15:08:16,065 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-29 15:08:16,065 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-29 15:08:16,065 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-29 15:08:16,065 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-29 15:08:16,066 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-29 15:08:16,067 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:08:17,676 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:08:17,684 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:08:17,692 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:02<31:23, 2.77s/it] {'loss': 1.3865, 'grad_norm': 8.340126991271973, 'learning_rate': 0.0, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000571608543396, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.02287006378173828, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'KL/chosen_KL_mean': 0.00527191162109375, 'KL/rejected_KL_mean': 0.028141021728515625, 'KL/mean': 0.016706019639968872, 'KL/std': 0.272699236869812, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'epoch': 0.0} + 0%| | 1/681 [00:02<31:23, 2.77s/it] 0%| | 2/681 [00:05<29:50, 2.64s/it] {'loss': 1.387, 'grad_norm': 7.205794811248779, 'learning_rate': 7.246376811594203e-09, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.500164270401001, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06572261452674866, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65568923950195, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'KL/chosen_KL_mean': -0.03498649597167969, 'KL/rejected_KL_mean': 0.030735015869140625, 'KL/mean': -0.00212840735912323, 'KL/std': 0.24797174334526062, 'logits/chosen': -0.49536412954330444, 'logits/rejected': -0.4594460427761078, 'epoch': 0.0} + 0%| | 2/681 [00:05<29:50, 2.64s/it] 0%| | 3/681 [00:07<29:31, 2.61s/it] {'loss': 1.386, 'grad_norm': 7.091545581817627, 'learning_rate': 1.4492753623188406e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49991729855537415, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.03308027982711792, 'margin_dpo/margin_mean': 0.03308090567588806, 'margin_dpo/margin_std': 0.3488999903202057, 'logps/chosen': -60.95341873168945, 'logps/rejected': -68.67750549316406, 'logps/ref_chosen': -60.981597900390625, 'logps/ref_rejected': -68.67259216308594, 'KL/chosen_KL_mean': 0.028177261352539062, 'KL/rejected_KL_mean': -0.00490570068359375, 'KL/mean': 0.011634737253189087, 'KL/std': 0.2545679211616516, 'logits/chosen': -0.4817052185535431, 'logits/rejected': -0.44228988885879517, 'epoch': 0.0} + 0%| | 3/681 [00:07<29:31, 2.61s/it] 1%| | 4/681 [00:10<29:44, 2.64s/it] {'loss': 1.3866, 'grad_norm': 7.214421272277832, 'learning_rate': 2.1739130434782606e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000672340393066, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.026903212070465088, 'margin_dpo/margin_mean': -0.026903808116912842, 'margin_dpo/margin_std': 0.39421218633651733, 'logps/chosen': -56.76152801513672, 'logps/rejected': -86.61402130126953, 'logps/ref_chosen': -56.7677116394043, 'logps/ref_rejected': -86.64710998535156, 'KL/chosen_KL_mean': 0.006183624267578125, 'KL/rejected_KL_mean': 0.03308868408203125, 'KL/mean': 0.019635915756225586, 'KL/std': 0.28558221459388733, 'logits/chosen': -0.468106746673584, 'logits/rejected': -0.44051337242126465, 'epoch': 0.01} + 1%| | 4/681 [00:10<29:44, 2.64s/it] 1%| | 5/681 [00:13<29:32, 2.62s/it] {'loss': 1.386, 'grad_norm': 8.964797019958496, 'learning_rate': 2.898550724637681e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49991726875305176, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.03309446573257446, 'margin_dpo/margin_mean': 0.033094823360443115, 'margin_dpo/margin_std': 0.38494962453842163, 'logps/chosen': -53.839942932128906, 'logps/rejected': -84.162841796875, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'KL/chosen_KL_mean': 0.01943206787109375, 'KL/rejected_KL_mean': -0.013660430908203125, 'KL/mean': 0.002883225679397583, 'KL/std': 0.2767731547355652, 'logits/chosen': -0.5146475434303284, 'logits/rejected': -0.47093117237091064, 'epoch': 0.01} + 1%| | 5/681 [00:13<29:32, 2.62s/it] 1%| | 6/681 [00:15<27:59, 2.49s/it] {'loss': 1.3862, 'grad_norm': 9.190613746643066, 'learning_rate': 3.6231884057971014e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49998119473457336, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.007514864206314087, 'margin_dpo/margin_mean': 0.007514625787734985, 'margin_dpo/margin_std': 0.3818962574005127, 'logps/chosen': -63.016971588134766, 'logps/rejected': -92.662353515625, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'KL/chosen_KL_mean': -0.009487152099609375, 'KL/rejected_KL_mean': -0.01700592041015625, 'KL/mean': -0.01324455440044403, 'KL/std': 0.27032917737960815, 'logits/chosen': -0.5035334825515747, 'logits/rejected': -0.46098393201828003, 'epoch': 0.01} + 1%| | 6/681 [00:15<27:59, 2.49s/it] 1%| | 7/681 [00:17<27:24, 2.44s/it] {'loss': 1.3857, 'grad_norm': 8.227945327758789, 'learning_rate': 4.347826086956521e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4998607337474823, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.05570727586746216, 'margin_dpo/margin_mean': 0.055707335472106934, 'margin_dpo/margin_std': 0.38999414443969727, 'logps/chosen': -57.71474075317383, 'logps/rejected': -103.91621398925781, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'KL/chosen_KL_mean': 0.06007957458496094, 'KL/rejected_KL_mean': 0.004375457763671875, 'KL/mean': 0.0322260856628418, 'KL/std': 0.2890022397041321, 'logits/chosen': -0.5052176713943481, 'logits/rejected': -0.47141021490097046, 'epoch': 0.01} + 1%| | 7/681 [00:17<27:24, 2.44s/it] 1%| | 8/681 [00:20<27:08, 2.42s/it] {'loss': 1.3864, 'grad_norm': 7.855659008026123, 'learning_rate': 5.0724637681159424e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000264644622803, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.010594159364700317, 'margin_dpo/margin_mean': -0.010594218969345093, 'margin_dpo/margin_std': 0.42736732959747314, 'logps/chosen': -58.67619323730469, 'logps/rejected': -79.260986328125, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'KL/chosen_KL_mean': 0.039844512939453125, 'KL/rejected_KL_mean': 0.050434112548828125, 'KL/mean': 0.04513771831989288, 'KL/std': 0.3095516264438629, 'logits/chosen': -0.5170360803604126, 'logits/rejected': -0.492270290851593, 'epoch': 0.01} + 1%| | 8/681 [00:20<27:08, 2.42s/it] 1%|▏ | 9/681 [00:22<27:37, 2.47s/it] {'loss': 1.3858, 'grad_norm': 8.50635814666748, 'learning_rate': 5.797101449275362e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4998795986175537, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.048153460025787354, 'margin_dpo/margin_mean': 0.04815271496772766, 'margin_dpo/margin_std': 0.38030916452407837, 'logps/chosen': -69.84125518798828, 'logps/rejected': -99.62522888183594, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'KL/chosen_KL_mean': 0.0255889892578125, 'KL/rejected_KL_mean': -0.0225677490234375, 'KL/mean': 0.0015124678611755371, 'KL/std': 0.2851980924606323, 'logits/chosen': -0.4870206117630005, 'logits/rejected': -0.4398488402366638, 'epoch': 0.01} + 1%|▏ | 9/681 [00:22<27:37, 2.47s/it] 1%|▏ | 10/681 [00:25<27:41, 2.48s/it] {'loss': 1.3863, 'grad_norm': 7.091888427734375, 'learning_rate': 6.521739130434782e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000036358833313, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.0014634579420089722, 'margin_dpo/margin_mean': -0.001463174819946289, 'margin_dpo/margin_std': 0.3855435252189636, 'logps/chosen': -48.372474670410156, 'logps/rejected': -80.38538360595703, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'KL/chosen_KL_mean': -0.014789581298828125, 'KL/rejected_KL_mean': -0.0133209228515625, 'KL/mean': -0.01405847817659378, 'KL/std': 0.2681947946548462, 'logits/chosen': -0.4998844861984253, 'logits/rejected': -0.45695722103118896, 'epoch': 0.01} + 1%|▏ | 10/681 [00:25<27:41, 2.48s/it] 2%|▏ | 11/681 [00:27<28:25, 2.55s/it] {'loss': 1.3862, 'grad_norm': 6.8613715171813965, 'learning_rate': 7.246376811594203e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49998754262924194, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.004973113536834717, 'margin_dpo/margin_mean': 0.0049735307693481445, 'margin_dpo/margin_std': 0.2909265458583832, 'logps/chosen': -53.02473449707031, 'logps/rejected': -87.79322814941406, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'KL/chosen_KL_mean': -0.007877349853515625, 'KL/rejected_KL_mean': -0.01285552978515625, 'KL/mean': -0.010366648435592651, 'KL/std': 0.2346026599407196, 'logits/chosen': -0.46066391468048096, 'logits/rejected': -0.4356629252433777, 'epoch': 0.02} + 2%|▏ | 11/681 [00:27<28:25, 2.55s/it] 2%|▏ | 12/681 [00:30<28:39, 2.57s/it] {'loss': 1.3857, 'grad_norm': 9.009154319763184, 'learning_rate': 7.971014492753623e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49984902143478394, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.06038558483123779, 'margin_dpo/margin_mean': 0.0603850781917572, 'margin_dpo/margin_std': 0.43303191661834717, 'logps/chosen': -61.79936218261719, 'logps/rejected': -104.91258239746094, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.8582763671875, 'KL/chosen_KL_mean': 0.006072998046875, 'KL/rejected_KL_mean': -0.054309844970703125, 'KL/mean': -0.024121850728988647, 'KL/std': 0.3304472863674164, 'logits/chosen': -0.5414502620697021, 'logits/rejected': -0.5054250359535217, 'epoch': 0.02} + 2%|▏ | 12/681 [00:30<28:39, 2.57s/it] 2%|▏ | 13/681 [00:33<29:04, 2.61s/it] {'loss': 1.3864, 'grad_norm': 7.9163641929626465, 'learning_rate': 8.695652173913042e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000314712524414, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.012606263160705566, 'margin_dpo/margin_mean': -0.012606710195541382, 'margin_dpo/margin_std': 0.3794170618057251, 'logps/chosen': -64.2663345336914, 'logps/rejected': -87.19645690917969, 'logps/ref_chosen': -64.2603530883789, 'logps/ref_rejected': -87.20307922363281, 'KL/chosen_KL_mean': -0.0059814453125, 'KL/rejected_KL_mean': 0.00662994384765625, 'KL/mean': 0.00032275915145874023, 'KL/std': 0.2694360017776489, 'logits/chosen': -0.49102455377578735, 'logits/rejected': -0.46374207735061646, 'epoch': 0.02} + 2%|▏ | 13/681 [00:33<29:04, 2.61s/it] 2%|▏ | 14/681 [00:35<28:45, 2.59s/it] {'loss': 1.3866, 'grad_norm': 8.576128005981445, 'learning_rate': 9.420289855072464e-08, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000874996185303, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.03501877188682556, 'margin_dpo/margin_mean': -0.03501787781715393, 'margin_dpo/margin_std': 0.4262927174568176, 'logps/chosen': -58.14665222167969, 'logps/rejected': -104.04850006103516, 'logps/ref_chosen': -58.11021041870117, 'logps/ref_rejected': -104.04708099365234, 'KL/chosen_KL_mean': -0.03643989562988281, 'KL/rejected_KL_mean': -0.0014190673828125, 'KL/mean': -0.018927976489067078, 'KL/std': 0.2660324275493622, 'logits/chosen': -0.49155694246292114, 'logits/rejected': -0.4527207314968109, 'epoch': 0.02} + 2%|▏ | 14/681 [00:35<28:45, 2.59s/it] 2%|▏ | 15/681 [00:38<28:40, 2.58s/it] {'loss': 1.3858, 'grad_norm': 6.426931858062744, 'learning_rate': 1.0144927536231885e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.499883234500885, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.04670119285583496, 'margin_dpo/margin_mean': 0.04670119285583496, 'margin_dpo/margin_std': 0.32319122552871704, 'logps/chosen': -56.99523162841797, 'logps/rejected': -80.8836669921875, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'KL/chosen_KL_mean': -0.028324127197265625, 'KL/rejected_KL_mean': -0.07502365112304688, 'KL/mean': -0.051674991846084595, 'KL/std': 0.23149462044239044, 'logits/chosen': -0.5326635837554932, 'logits/rejected': -0.5161415338516235, 'epoch': 0.02} + 2%|▏ | 15/681 [00:38<28:40, 2.58s/it] 2%|▏ | 16/681 [00:40<28:16, 2.55s/it] {'loss': 1.3864, 'grad_norm': 8.416418075561523, 'learning_rate': 1.0869565217391303e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000145435333252, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.005795121192932129, 'margin_dpo/margin_mean': -0.005795121192932129, 'margin_dpo/margin_std': 0.3890739381313324, 'logps/chosen': -61.78809356689453, 'logps/rejected': -84.41188049316406, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'KL/chosen_KL_mean': -0.04820060729980469, 'KL/rejected_KL_mean': -0.0424041748046875, 'KL/mean': -0.04530364274978638, 'KL/std': 0.29397979378700256, 'logits/chosen': -0.5461217164993286, 'logits/rejected': -0.5072727203369141, 'epoch': 0.02} + 2%|▏ | 16/681 [00:40<28:16, 2.55s/it] 2%|▏ | 17/681 [00:43<28:01, 2.53s/it] {'loss': 1.3852, 'grad_norm': 7.9028801918029785, 'learning_rate': 1.1594202898550725e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4997136890888214, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.11452382802963257, 'margin_dpo/margin_mean': 0.11452355980873108, 'margin_dpo/margin_std': 0.37491074204444885, 'logps/chosen': -67.66343688964844, 'logps/rejected': -85.44627380371094, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'KL/chosen_KL_mean': 0.046901702880859375, 'KL/rejected_KL_mean': -0.06762313842773438, 'KL/mean': -0.010359078645706177, 'KL/std': 0.27913013100624084, 'logits/chosen': -0.5041570067405701, 'logits/rejected': -0.4673753082752228, 'epoch': 0.02} + 2%|▏ | 17/681 [00:43<28:01, 2.53s/it] 3%|▎ | 18/681 [00:45<27:53, 2.52s/it] {'loss': 1.3861, 'grad_norm': 8.233268737792969, 'learning_rate': 1.2318840579710146e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4999527335166931, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.018904417753219604, 'margin_dpo/margin_mean': 0.01890420913696289, 'margin_dpo/margin_std': 0.31775712966918945, 'logps/chosen': -47.76877212524414, 'logps/rejected': -75.52047729492188, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'KL/chosen_KL_mean': -0.0292816162109375, 'KL/rejected_KL_mean': -0.048187255859375, 'KL/mean': -0.03873269259929657, 'KL/std': 0.23902130126953125, 'logits/chosen': -0.5057047009468079, 'logits/rejected': -0.45175978541374207, 'epoch': 0.03} + 3%|▎ | 18/681 [00:45<27:53, 2.52s/it] 3%|▎ | 19/681 [00:48<28:04, 2.54s/it] {'loss': 1.3855, 'grad_norm': 7.455746173858643, 'learning_rate': 1.3043478260869563e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49980464577674866, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.07813850045204163, 'margin_dpo/margin_mean': 0.07813867926597595, 'margin_dpo/margin_std': 0.3746962547302246, 'logps/chosen': -70.17062377929688, 'logps/rejected': -89.8010025024414, 'logps/ref_chosen': -70.20536041259766, 'logps/ref_rejected': -89.7575912475586, 'KL/chosen_KL_mean': 0.03473663330078125, 'KL/rejected_KL_mean': -0.0434112548828125, 'KL/mean': -0.004338964819908142, 'KL/std': 0.24089065194129944, 'logits/chosen': -0.5036299228668213, 'logits/rejected': -0.45466092228889465, 'epoch': 0.03} + 3%|▎ | 19/681 [00:48<28:04, 2.54s/it] 3%|▎ | 20/681 [00:50<28:06, 2.55s/it] {'loss': 1.3864, 'grad_norm': 7.4274702072143555, 'learning_rate': 1.3768115942028986e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.5000186562538147, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.007474333047866821, 'margin_dpo/margin_mean': -0.007474362850189209, 'margin_dpo/margin_std': 0.37508344650268555, 'logps/chosen': -50.852455139160156, 'logps/rejected': -78.86508178710938, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.82334899902344, 'KL/chosen_KL_mean': -0.049213409423828125, 'KL/rejected_KL_mean': -0.041736602783203125, 'KL/mean': -0.0454762727022171, 'KL/std': 0.29056376218795776, 'logits/chosen': -0.5517487525939941, 'logits/rejected': -0.49535927176475525, 'epoch': 0.03} + 3%|▎ | 20/681 [00:50<28:06, 2.55s/it] 3%|▎ | 21/681 [00:53<27:48, 2.53s/it] {'loss': 1.3856, 'grad_norm': 7.820558547973633, 'learning_rate': 1.4492753623188405e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4998205900192261, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.07176293432712555, 'margin_dpo/margin_mean': 0.07176269590854645, 'margin_dpo/margin_std': 0.43745559453964233, 'logps/chosen': -50.068641662597656, 'logps/rejected': -77.94618225097656, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'KL/chosen_KL_mean': -0.0056247711181640625, 'KL/rejected_KL_mean': -0.077392578125, 'KL/mean': -0.04150792211294174, 'KL/std': 0.30757784843444824, 'logits/chosen': -0.5255781412124634, 'logits/rejected': -0.5039485096931458, 'epoch': 0.03} + 3%|▎ | 21/681 [00:53<27:48, 2.53s/it] 3%|▎ | 22/681 [00:55<27:50, 2.53s/it] {'loss': 1.3851, 'grad_norm': 8.56733512878418, 'learning_rate': 1.5217391304347825e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49969562888145447, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.12175038456916809, 'margin_dpo/margin_mean': 0.12175118923187256, 'margin_dpo/margin_std': 0.3743841052055359, 'logps/chosen': -59.0850830078125, 'logps/rejected': -97.65386962890625, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'KL/chosen_KL_mean': -0.027448654174804688, 'KL/rejected_KL_mean': -0.14919662475585938, 'KL/mean': -0.0883231908082962, 'KL/std': 0.2602458596229553, 'logits/chosen': -0.4898416996002197, 'logits/rejected': -0.44627994298934937, 'epoch': 0.03} + 3%|▎ | 22/681 [00:56<27:50, 2.53s/it] 3%|▎ | 23/681 [00:58<28:55, 2.64s/it] {'loss': 1.3845, 'grad_norm': 8.251264572143555, 'learning_rate': 1.5942028985507245e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.499561607837677, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.17535313963890076, 'margin_dpo/margin_mean': 0.17535346746444702, 'margin_dpo/margin_std': 0.47338640689849854, 'logps/chosen': -60.01300811767578, 'logps/rejected': -81.25021362304688, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.13955688476562, 'KL/chosen_KL_mean': 0.06468772888183594, 'KL/rejected_KL_mean': -0.11066055297851562, 'KL/mean': -0.022989824414253235, 'KL/std': 0.3429142236709595, 'logits/chosen': -0.4786554276943207, 'logits/rejected': -0.4556560516357422, 'epoch': 0.03} + 3%|▎ | 23/681 [00:58<28:55, 2.64s/it] 4%|▎ | 24/681 [01:01<28:59, 2.65s/it] {'loss': 1.3835, 'grad_norm': 8.768179893493652, 'learning_rate': 1.6666666666666665e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.499301016330719, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.2795853614807129, 'margin_dpo/margin_mean': 0.2795855402946472, 'margin_dpo/margin_std': 0.3744848668575287, 'logps/chosen': -44.230220794677734, 'logps/rejected': -99.34398651123047, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'KL/chosen_KL_mean': 0.06081390380859375, 'KL/rejected_KL_mean': -0.21877288818359375, 'KL/mean': -0.07897857576608658, 'KL/std': 0.30757251381874084, 'logits/chosen': -0.51224684715271, 'logits/rejected': -0.4956665635108948, 'epoch': 0.04} + 4%|▎ | 24/681 [01:01<28:59, 2.65s/it] 4%|▎ | 25/681 [01:04<28:56, 2.65s/it] {'loss': 1.3834, 'grad_norm': 7.525589942932129, 'learning_rate': 1.7391304347826085e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49927568435668945, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.2897287607192993, 'margin_dpo/margin_mean': 0.28972867131233215, 'margin_dpo/margin_std': 0.42933177947998047, 'logps/chosen': -52.48737335205078, 'logps/rejected': -89.58224487304688, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'KL/chosen_KL_mean': 0.04968070983886719, 'KL/rejected_KL_mean': -0.24005126953125, 'KL/mean': -0.09518682956695557, 'KL/std': 0.369601845741272, 'logits/chosen': -0.4925091564655304, 'logits/rejected': -0.4624241888523102, 'epoch': 0.04} + 4%|▎ | 25/681 [01:04<28:56, 2.65s/it] 4%|▍ | 26/681 [01:06<27:39, 2.53s/it] {'loss': 1.3823, 'grad_norm': 9.038716316223145, 'learning_rate': 1.8115942028985507e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49898844957351685, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.4046301543712616, 'margin_dpo/margin_mean': 0.40463075041770935, 'margin_dpo/margin_std': 0.5417345762252808, 'logps/chosen': -53.83065414428711, 'logps/rejected': -103.67218780517578, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'KL/chosen_KL_mean': 0.09215354919433594, 'KL/rejected_KL_mean': -0.3124732971191406, 'KL/mean': -0.1101590245962143, 'KL/std': 0.4453110992908478, 'logits/chosen': -0.5335673689842224, 'logits/rejected': -0.5019059777259827, 'epoch': 0.04} + 4%|▍ | 26/681 [01:06<27:39, 2.53s/it] 4%|▍ | 27/681 [01:08<27:28, 2.52s/it] {'loss': 1.3809, 'grad_norm': 9.623809814453125, 'learning_rate': 1.8840579710144927e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49863457679748535, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.5461834073066711, 'margin_dpo/margin_mean': 0.5461829900741577, 'margin_dpo/margin_std': 0.6316946744918823, 'logps/chosen': -42.76161193847656, 'logps/rejected': -99.13346862792969, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72419738769531, 'KL/chosen_KL_mean': 0.1369171142578125, 'KL/rejected_KL_mean': -0.4092674255371094, 'KL/mean': -0.1361747682094574, 'KL/std': 0.48576533794403076, 'logits/chosen': -0.5602696537971497, 'logits/rejected': -0.5244206190109253, 'epoch': 0.04} + 4%|▍ | 27/681 [01:08<27:28, 2.52s/it] 4%|▍ | 28/681 [01:11<27:40, 2.54s/it] {'loss': 1.3831, 'grad_norm': 7.836782932281494, 'learning_rate': 1.9565217391304347e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4991976320743561, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.32094791531562805, 'margin_dpo/margin_mean': 0.3209477663040161, 'margin_dpo/margin_std': 0.5258319973945618, 'logps/chosen': -60.542015075683594, 'logps/rejected': -91.70758056640625, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'KL/chosen_KL_mean': 0.014486312866210938, 'KL/rejected_KL_mean': -0.3064613342285156, 'KL/mean': -0.14598755538463593, 'KL/std': 0.3845537304878235, 'logits/chosen': -0.502698540687561, 'logits/rejected': -0.4471771717071533, 'epoch': 0.04} + 4%|▍ | 28/681 [01:11<27:40, 2.54s/it] 4%|▍ | 29/681 [01:13<26:36, 2.45s/it] {'loss': 1.3818, 'grad_norm': 9.418075561523438, 'learning_rate': 2.028985507246377e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4988635182380676, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.4545966386795044, 'margin_dpo/margin_mean': 0.4545968770980835, 'margin_dpo/margin_std': 0.5804776549339294, 'logps/chosen': -57.73210144042969, 'logps/rejected': -97.77326965332031, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'KL/chosen_KL_mean': 0.07568168640136719, 'KL/rejected_KL_mean': -0.37891387939453125, 'KL/mean': -0.15161648392677307, 'KL/std': 0.46041831374168396, 'logits/chosen': -0.5607113242149353, 'logits/rejected': -0.5150310397148132, 'epoch': 0.04} + 4%|▍ | 29/681 [01:13<26:36, 2.45s/it] 4%|▍ | 30/681 [01:16<27:11, 2.51s/it] {'loss': 1.38, 'grad_norm': 9.180720329284668, 'learning_rate': 2.1014492753623187e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49843254685401917, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.6269863843917847, 'margin_dpo/margin_mean': 0.6269862651824951, 'margin_dpo/margin_std': 0.565685510635376, 'logps/chosen': -52.45802688598633, 'logps/rejected': -98.99684143066406, 'logps/ref_chosen': -52.577369689941406, 'logps/ref_rejected': -98.48920440673828, 'KL/chosen_KL_mean': 0.11934471130371094, 'KL/rejected_KL_mean': -0.5076408386230469, 'KL/mean': -0.19414639472961426, 'KL/std': 0.5560356974601746, 'logits/chosen': -0.46149182319641113, 'logits/rejected': -0.42938873171806335, 'epoch': 0.04} + 4%|▍ | 30/681 [01:16<27:11, 2.51s/it] 5%|▍ | 31/681 [01:19<27:44, 2.56s/it] {'loss': 1.3818, 'grad_norm': 7.051517009735107, 'learning_rate': 2.1739130434782607e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.498860627412796, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.4557562470436096, 'margin_dpo/margin_mean': 0.455756276845932, 'margin_dpo/margin_std': 0.6158726215362549, 'logps/chosen': -63.72868347167969, 'logps/rejected': -73.27153015136719, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'KL/chosen_KL_mean': 0.07823753356933594, 'KL/rejected_KL_mean': -0.3775215148925781, 'KL/mean': -0.14964136481285095, 'KL/std': 0.5351479649543762, 'logits/chosen': -0.4864119291305542, 'logits/rejected': -0.43947017192840576, 'epoch': 0.05} + 5%|▍ | 31/681 [01:19<27:44, 2.56s/it] 5%|▍ | 32/681 [01:21<28:19, 2.62s/it] {'loss': 1.3785, 'grad_norm': 8.872539520263672, 'learning_rate': 2.2463768115942027e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4980509281158447, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.7796535491943359, 'margin_dpo/margin_mean': 0.7796535491943359, 'margin_dpo/margin_std': 0.9119139909744263, 'logps/chosen': -62.52357482910156, 'logps/rejected': -89.88121032714844, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'KL/chosen_KL_mean': 0.21595001220703125, 'KL/rejected_KL_mean': -0.5637054443359375, 'KL/mean': -0.17387576401233673, 'KL/std': 0.7160457968711853, 'logits/chosen': -0.5233839750289917, 'logits/rejected': -0.48274725675582886, 'epoch': 0.05} + 5%|▍ | 32/681 [01:21<28:19, 2.62s/it] 5%|▍ | 33/681 [01:24<27:38, 2.56s/it] {'loss': 1.3803, 'grad_norm': 7.534836292266846, 'learning_rate': 2.318840579710145e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49848970770835876, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.6041242480278015, 'margin_dpo/margin_mean': 0.6041243076324463, 'margin_dpo/margin_std': 0.696311354637146, 'logps/chosen': -53.151023864746094, 'logps/rejected': -88.37931823730469, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'KL/chosen_KL_mean': 0.10995101928710938, 'KL/rejected_KL_mean': -0.49417877197265625, 'KL/mean': -0.19211336970329285, 'KL/std': 0.5824633836746216, 'logits/chosen': -0.49200475215911865, 'logits/rejected': -0.465828537940979, 'epoch': 0.05} + 5%|▍ | 33/681 [01:24<27:38, 2.56s/it] 5%|▍ | 34/681 [01:26<27:47, 2.58s/it] {'loss': 1.3788, 'grad_norm': 8.2849702835083, 'learning_rate': 2.391304347826087e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49811026453971863, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.7559173107147217, 'margin_dpo/margin_mean': 0.7559161186218262, 'margin_dpo/margin_std': 0.89031583070755, 'logps/chosen': -50.760528564453125, 'logps/rejected': -102.62095642089844, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'KL/chosen_KL_mean': 0.05680084228515625, 'KL/rejected_KL_mean': -0.6991157531738281, 'KL/mean': -0.3211583197116852, 'KL/std': 0.7343294620513916, 'logits/chosen': -0.4856771230697632, 'logits/rejected': -0.4683513939380646, 'epoch': 0.05} + 5%|▍ | 34/681 [01:26<27:47, 2.58s/it] 5%|▌ | 35/681 [01:29<28:03, 2.61s/it] {'loss': 1.3745, 'grad_norm': 9.21121597290039, 'learning_rate': 2.463768115942029e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49704039096832275, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.1839112043380737, 'margin_dpo/margin_mean': 1.1839113235473633, 'margin_dpo/margin_std': 1.1529996395111084, 'logps/chosen': -50.89863586425781, 'logps/rejected': -107.88248443603516, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'KL/chosen_KL_mean': 0.1258563995361328, 'KL/rejected_KL_mean': -1.0580558776855469, 'KL/mean': -0.46609964966773987, 'KL/std': 0.997234582901001, 'logits/chosen': -0.5175144672393799, 'logits/rejected': -0.48064374923706055, 'epoch': 0.05} + 5%|▌ | 35/681 [01:29<28:03, 2.61s/it] 5%|▌ | 36/681 [01:32<28:11, 2.62s/it] {'loss': 1.3746, 'grad_norm': 8.075494766235352, 'learning_rate': 2.536231884057971e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4970599412918091, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.1760886907577515, 'margin_dpo/margin_mean': 1.1760879755020142, 'margin_dpo/margin_std': 1.2999153137207031, 'logps/chosen': -51.97834777832031, 'logps/rejected': -87.20356750488281, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.0406265258789, 'KL/chosen_KL_mean': 0.013143539428710938, 'KL/rejected_KL_mean': -1.1629409790039062, 'KL/mean': -0.5748996138572693, 'KL/std': 1.143606424331665, 'logits/chosen': -0.569900393486023, 'logits/rejected': -0.5340551733970642, 'epoch': 0.05} + 5%|▌ | 36/681 [01:32<28:11, 2.62s/it] 5%|▌ | 37/681 [01:34<28:11, 2.63s/it] {'loss': 1.376, 'grad_norm': 6.84469747543335, 'learning_rate': 2.6086956521739126e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4974081218242645, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.0368335247039795, 'margin_dpo/margin_mean': 1.0368335247039795, 'margin_dpo/margin_std': 1.3373100757598877, 'logps/chosen': -62.79753112792969, 'logps/rejected': -78.92233276367188, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'KL/chosen_KL_mean': 0.00957489013671875, 'KL/rejected_KL_mean': -1.0272636413574219, 'KL/mean': -0.5088434219360352, 'KL/std': 1.0523037910461426, 'logits/chosen': -0.5325401425361633, 'logits/rejected': -0.49065572023391724, 'epoch': 0.05} + 5%|▌ | 37/681 [01:34<28:11, 2.63s/it] 6%|▌ | 38/681 [01:37<26:51, 2.51s/it] {'loss': 1.372, 'grad_norm': 7.924060821533203, 'learning_rate': 2.681159420289855e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4963989853858948, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.4405823945999146, 'margin_dpo/margin_mean': 1.440582513809204, 'margin_dpo/margin_std': 1.5998311042785645, 'logps/chosen': -48.22242736816406, 'logps/rejected': -99.1849365234375, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'KL/chosen_KL_mean': 0.1680927276611328, 'KL/rejected_KL_mean': -1.272491455078125, 'KL/mean': -0.552198052406311, 'KL/std': 1.348757266998291, 'logits/chosen': -0.5047751665115356, 'logits/rejected': -0.47182124853134155, 'epoch': 0.06} + 6%|▌ | 38/681 [01:37<26:51, 2.51s/it] 6%|▌ | 39/681 [01:39<26:49, 2.51s/it] {'loss': 1.3697, 'grad_norm': 8.258176803588867, 'learning_rate': 2.753623188405797e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4958198070526123, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.6722157001495361, 'margin_dpo/margin_mean': 1.6722155809402466, 'margin_dpo/margin_std': 1.39006769657135, 'logps/chosen': -50.653709411621094, 'logps/rejected': -80.14496612548828, 'logps/ref_chosen': -50.75047302246094, 'logps/ref_rejected': -78.56951141357422, 'KL/chosen_KL_mean': 0.09676551818847656, 'KL/rejected_KL_mean': -1.5754547119140625, 'KL/mean': -0.7393452525138855, 'KL/std': 1.302678108215332, 'logits/chosen': -0.5640593767166138, 'logits/rejected': -0.5244793891906738, 'epoch': 0.06} + 6%|▌ | 39/681 [01:39<26:49, 2.51s/it] 6%|▌ | 40/681 [01:42<27:26, 2.57s/it] {'loss': 1.3711, 'grad_norm': 6.980234146118164, 'learning_rate': 2.8260869565217386e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4961639642715454, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.5346159934997559, 'margin_dpo/margin_mean': 1.5346163511276245, 'margin_dpo/margin_std': 1.6990015506744385, 'logps/chosen': -57.7794075012207, 'logps/rejected': -75.6290283203125, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.3000717163086, 'KL/chosen_KL_mean': 0.2056598663330078, 'KL/rejected_KL_mean': -1.3289527893066406, 'KL/mean': -0.5616458654403687, 'KL/std': 1.4022493362426758, 'logits/chosen': -0.5176148414611816, 'logits/rejected': -0.4874315857887268, 'epoch': 0.06} + 6%|▌ | 40/681 [01:42<27:26, 2.57s/it] 6%|▌ | 41/681 [01:44<27:20, 2.56s/it] {'loss': 1.3681, 'grad_norm': 8.068608283996582, 'learning_rate': 2.898550724637681e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49541300535202026, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.8351185321807861, 'margin_dpo/margin_mean': 1.8351190090179443, 'margin_dpo/margin_std': 2.0257954597473145, 'logps/chosen': -62.684226989746094, 'logps/rejected': -98.84706115722656, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'KL/chosen_KL_mean': 0.01158905029296875, 'KL/rejected_KL_mean': -1.8235282897949219, 'KL/mean': -0.9059728980064392, 'KL/std': 1.8433566093444824, 'logits/chosen': -0.5168710350990295, 'logits/rejected': -0.4790883958339691, 'epoch': 0.06} + 6%|▌ | 41/681 [01:44<27:20, 2.56s/it] 6%|▌ | 42/681 [01:47<27:14, 2.56s/it] {'loss': 1.3601, 'grad_norm': 9.999738693237305, 'learning_rate': 2.971014492753623e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49337083101272583, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.6524176597595215, 'margin_dpo/margin_mean': 2.6524174213409424, 'margin_dpo/margin_std': 2.480203628540039, 'logps/chosen': -58.731651306152344, 'logps/rejected': -112.32601928710938, 'logps/ref_chosen': -58.966426849365234, 'logps/ref_rejected': -109.90837097167969, 'KL/chosen_KL_mean': 0.2347736358642578, 'KL/rejected_KL_mean': -2.4176406860351562, 'KL/mean': -1.0914355516433716, 'KL/std': 2.2188539505004883, 'logits/chosen': -0.5359020233154297, 'logits/rejected': -0.4893391728401184, 'epoch': 0.06} + 6%|▌ | 42/681 [01:47<27:14, 2.56s/it] 6%|▋ | 43/681 [01:49<27:19, 2.57s/it] {'loss': 1.3616, 'grad_norm': 8.732246398925781, 'learning_rate': 3.043478260869565e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4937525987625122, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.4994335174560547, 'margin_dpo/margin_mean': 2.4994330406188965, 'margin_dpo/margin_std': 2.0141167640686035, 'logps/chosen': -53.609649658203125, 'logps/rejected': -98.43328094482422, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'KL/chosen_KL_mean': 0.5463447570800781, 'KL/rejected_KL_mean': -1.9530906677246094, 'KL/mean': -0.7033693790435791, 'KL/std': 1.851230263710022, 'logits/chosen': -0.5335399508476257, 'logits/rejected': -0.5083379745483398, 'epoch': 0.06} + 6%|▋ | 43/681 [01:50<27:19, 2.57s/it] 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] {'loss': 1.3577, 'grad_norm': 9.835100173950195, 'learning_rate': 3.115942028985507e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4927557110786438, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.8983845710754395, 'margin_dpo/margin_mean': 2.8983850479125977, 'margin_dpo/margin_std': 2.2746810913085938, 'logps/chosen': -49.82194519042969, 'logps/rejected': -111.42558288574219, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'KL/chosen_KL_mean': 0.25655555725097656, 'KL/rejected_KL_mean': -2.641826629638672, 'KL/mean': -1.1926369667053223, 'KL/std': 2.3078997135162354, 'logits/chosen': -0.48626744747161865, 'logits/rejected': -0.465964674949646, 'epoch': 0.06} + 6%|▋ | 44/681 [01:52<27:19, 2.57s/it] 7%|▋ | 45/681 [01:55<27:26, 2.59s/it] {'loss': 1.3639, 'grad_norm': 7.852822303771973, 'learning_rate': 3.188405797101449e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49431926012039185, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.2729620933532715, 'margin_dpo/margin_mean': 2.2729620933532715, 'margin_dpo/margin_std': 2.53743839263916, 'logps/chosen': -48.290428161621094, 'logps/rejected': -80.08489227294922, 'logps/ref_chosen': -48.4149284362793, 'logps/ref_rejected': -77.93643188476562, 'KL/chosen_KL_mean': 0.12450027465820312, 'KL/rejected_KL_mean': -2.1484642028808594, 'KL/mean': -1.0119799375534058, 'KL/std': 2.0461864471435547, 'logits/chosen': -0.457671582698822, 'logits/rejected': -0.4445871412754059, 'epoch': 0.07} + 7%|▋ | 45/681 [01:55<27:26, 2.59s/it] 7%|▋ | 46/681 [01:57<27:37, 2.61s/it] {'loss': 1.3575, 'grad_norm': 9.287505149841309, 'learning_rate': 3.260869565217391e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4926820993423462, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.92849063873291, 'margin_dpo/margin_mean': 2.92849063873291, 'margin_dpo/margin_std': 3.251277446746826, 'logps/chosen': -55.84687805175781, 'logps/rejected': -98.42852783203125, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'KL/chosen_KL_mean': 0.15254783630371094, 'KL/rejected_KL_mean': -2.7759437561035156, 'KL/mean': -1.3116981983184814, 'KL/std': 2.635380744934082, 'logits/chosen': -0.5358284115791321, 'logits/rejected': -0.48519566655158997, 'epoch': 0.07} + 7%|▋ | 46/681 [01:57<27:37, 2.61s/it] 7%|▋ | 47/681 [02:00<27:41, 2.62s/it] {'loss': 1.3567, 'grad_norm': 8.359269142150879, 'learning_rate': 3.333333333333333e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49249979853630066, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.0009684562683105, 'margin_dpo/margin_mean': 3.000969409942627, 'margin_dpo/margin_std': 2.6266069412231445, 'logps/chosen': -57.507999420166016, 'logps/rejected': -97.26210021972656, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'KL/chosen_KL_mean': 0.4180793762207031, 'KL/rejected_KL_mean': -2.5828895568847656, 'KL/mean': -1.0824042558670044, 'KL/std': 2.6021361351013184, 'logits/chosen': -0.583840012550354, 'logits/rejected': -0.531823992729187, 'epoch': 0.07} + 7%|▋ | 47/681 [02:00<27:41, 2.62s/it] 7%|▋ | 48/681 [02:03<27:57, 2.65s/it] {'loss': 1.355, 'grad_norm': 9.244763374328613, 'learning_rate': 3.4057971014492755e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4920506179332733, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.1809349060058594, 'margin_dpo/margin_mean': 3.1809351444244385, 'margin_dpo/margin_std': 3.039764881134033, 'logps/chosen': -57.183929443359375, 'logps/rejected': -91.19338989257812, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'KL/chosen_KL_mean': 0.00414276123046875, 'KL/rejected_KL_mean': -3.176788330078125, 'KL/mean': -1.5863243341445923, 'KL/std': 2.768789768218994, 'logits/chosen': -0.590816855430603, 'logits/rejected': -0.532641589641571, 'epoch': 0.07} + 7%|▋ | 48/681 [02:03<27:57, 2.65s/it] 7%|▋ | 49/681 [02:05<27:39, 2.63s/it] {'loss': 1.3479, 'grad_norm': 8.854732513427734, 'learning_rate': 3.478260869565217e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.49019408226013184, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.9254517555236816, 'margin_dpo/margin_mean': 3.9254512786865234, 'margin_dpo/margin_std': 4.098909378051758, 'logps/chosen': -61.382362365722656, 'logps/rejected': -87.39002227783203, 'logps/ref_chosen': -61.685272216796875, 'logps/ref_rejected': -83.76747131347656, 'KL/chosen_KL_mean': 0.3029060363769531, 'KL/rejected_KL_mean': -3.622547149658203, 'KL/mean': -1.659820556640625, 'KL/std': 3.5545148849487305, 'logits/chosen': -0.5587940812110901, 'logits/rejected': -0.5012864470481873, 'epoch': 0.07} + 7%|▋ | 49/681 [02:05<27:39, 2.63s/it] 7%|▋ | 50/681 [02:08<27:43, 2.64s/it] {'loss': 1.3456, 'grad_norm': 8.716004371643066, 'learning_rate': 3.5507246376811595e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4896165132522583, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 4.156033515930176, 'margin_dpo/margin_mean': 4.156033515930176, 'margin_dpo/margin_std': 4.06223201751709, 'logps/chosen': -58.93033218383789, 'logps/rejected': -100.72037506103516, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'KL/chosen_KL_mean': -0.2061939239501953, 'KL/rejected_KL_mean': -4.3622283935546875, 'KL/mean': -2.284212827682495, 'KL/std': 3.580059051513672, 'logits/chosen': -0.5688312649726868, 'logits/rejected': -0.5336655378341675, 'epoch': 0.07} + 7%|▋ | 50/681 [02:08<27:43, 2.64s/it] 7%|▋ | 51/681 [02:11<27:38, 2.63s/it] {'loss': 1.3458, 'grad_norm': 8.142417907714844, 'learning_rate': 3.6231884057971015e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4896053671836853, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 4.162949562072754, 'margin_dpo/margin_mean': 4.162949562072754, 'margin_dpo/margin_std': 5.265970706939697, 'logps/chosen': -61.67889404296875, 'logps/rejected': -80.47017669677734, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'KL/chosen_KL_mean': -0.30522727966308594, 'KL/rejected_KL_mean': -4.468173980712891, 'KL/mean': -2.3866991996765137, 'KL/std': 4.316704750061035, 'logits/chosen': -0.5444722175598145, 'logits/rejected': -0.5125424861907959, 'epoch': 0.07} + 7%|▋ | 51/681 [02:11<27:38, 2.63s/it] 8%|▊ | 52/681 [02:13<27:01, 2.58s/it] {'loss': 1.3248, 'grad_norm': 9.986041069030762, 'learning_rate': 3.695652173913043e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.48420995473861694, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 6.323929309844971, 'margin_dpo/margin_mean': 6.323929309844971, 'margin_dpo/margin_std': 5.412091255187988, 'logps/chosen': -52.019493103027344, 'logps/rejected': -85.97998046875, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'KL/chosen_KL_mean': 0.31786346435546875, 'KL/rejected_KL_mean': -6.006065368652344, 'KL/mean': -2.8440990447998047, 'KL/std': 4.961765289306641, 'logits/chosen': -0.5617812275886536, 'logits/rejected': -0.5068017840385437, 'epoch': 0.08} + 8%|▊ | 52/681 [02:13<27:01, 2.58s/it] 8%|▊ | 53/681 [02:16<26:55, 2.57s/it] {'loss': 1.3248, 'grad_norm': 10.334386825561523, 'learning_rate': 3.7681159420289855e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4840887486934662, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 6.37903356552124, 'margin_dpo/margin_mean': 6.379033088684082, 'margin_dpo/margin_std': 6.412992477416992, 'logps/chosen': -53.56161880493164, 'logps/rejected': -98.40959167480469, 'logps/ref_chosen': -53.31465148925781, 'logps/ref_rejected': -91.78359985351562, 'KL/chosen_KL_mean': -0.24696731567382812, 'KL/rejected_KL_mean': -6.6259918212890625, 'KL/mean': -3.4364819526672363, 'KL/std': 5.564968109130859, 'logits/chosen': -0.6039080619812012, 'logits/rejected': -0.5819511413574219, 'epoch': 0.08} + 8%|▊ | 53/681 [02:16<26:55, 2.57s/it] 8%|▊ | 54/681 [02:18<26:21, 2.52s/it] {'loss': 1.3317, 'grad_norm': 8.843002319335938, 'learning_rate': 3.8405797101449274e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4859907031059265, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 5.610658645629883, 'margin_dpo/margin_mean': 5.610658168792725, 'margin_dpo/margin_std': 5.301271438598633, 'logps/chosen': -51.144378662109375, 'logps/rejected': -97.78176879882812, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'KL/chosen_KL_mean': -0.4557170867919922, 'KL/rejected_KL_mean': -6.066375732421875, 'KL/mean': -3.2610464096069336, 'KL/std': 5.338939189910889, 'logits/chosen': -0.6185827255249023, 'logits/rejected': -0.566498339176178, 'epoch': 0.08} + 8%|▊ | 54/681 [02:18<26:21, 2.52s/it] 8%|▊ | 55/681 [02:20<25:25, 2.44s/it] {'loss': 1.3228, 'grad_norm': 9.306718826293945, 'learning_rate': 3.9130434782608694e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4835028052330017, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 6.613970756530762, 'margin_dpo/margin_mean': 6.6139702796936035, 'margin_dpo/margin_std': 7.726709365844727, 'logps/chosen': -63.68708801269531, 'logps/rejected': -96.67933654785156, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'KL/chosen_KL_mean': -1.0718555450439453, 'KL/rejected_KL_mean': -7.685829162597656, 'KL/mean': -4.378843307495117, 'KL/std': 6.400544166564941, 'logits/chosen': -0.6270061731338501, 'logits/rejected': -0.5628513097763062, 'epoch': 0.08} + 8%|▊ | 55/681 [02:20<25:25, 2.44s/it] 8%|▊ | 56/681 [02:23<25:59, 2.49s/it] {'loss': 1.3248, 'grad_norm': 8.995559692382812, 'learning_rate': 3.9855072463768114e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4840297996997833, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 6.402560234069824, 'margin_dpo/margin_mean': 6.402560234069824, 'margin_dpo/margin_std': 7.6934638023376465, 'logps/chosen': -58.774803161621094, 'logps/rejected': -101.4190673828125, 'logps/ref_chosen': -57.9327278137207, 'logps/ref_rejected': -94.1744384765625, 'KL/chosen_KL_mean': -0.8420734405517578, 'KL/rejected_KL_mean': -7.24462890625, 'KL/mean': -4.043349266052246, 'KL/std': 6.373098850250244, 'logits/chosen': -0.6062008142471313, 'logits/rejected': -0.563714861869812, 'epoch': 0.08} + 8%|▊ | 56/681 [02:23<25:59, 2.49s/it] 8%|▊ | 57/681 [02:25<26:01, 2.50s/it] {'loss': 1.3166, 'grad_norm': 9.872321128845215, 'learning_rate': 4.057971014492754e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.48196300864219666, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 7.227848052978516, 'margin_dpo/margin_mean': 7.227848052978516, 'margin_dpo/margin_std': 7.028669834136963, 'logps/chosen': -71.40656280517578, 'logps/rejected': -103.70458221435547, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'KL/chosen_KL_mean': -0.9112758636474609, 'KL/rejected_KL_mean': -8.13912582397461, 'KL/mean': -4.525204181671143, 'KL/std': 6.3168745040893555, 'logits/chosen': -0.5837876200675964, 'logits/rejected': -0.5559124946594238, 'epoch': 0.08} + 8%|▊ | 57/681 [02:25<26:01, 2.50s/it] 9%|▊ | 58/681 [02:28<26:27, 2.55s/it] {'loss': 1.309, 'grad_norm': 10.019336700439453, 'learning_rate': 4.1304347826086954e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4798462390899658, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 8.08529281616211, 'margin_dpo/margin_mean': 8.08529281616211, 'margin_dpo/margin_std': 8.671724319458008, 'logps/chosen': -63.397979736328125, 'logps/rejected': -93.9676284790039, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'KL/chosen_KL_mean': -1.2650394439697266, 'KL/rejected_KL_mean': -9.350334167480469, 'KL/mean': -5.307687759399414, 'KL/std': 7.259403228759766, 'logits/chosen': -0.5869804620742798, 'logits/rejected': -0.5100945830345154, 'epoch': 0.09} + 9%|▊ | 58/681 [02:28<26:27, 2.55s/it] 9%|▊ | 59/681 [02:31<26:28, 2.55s/it] {'loss': 1.3012, 'grad_norm': 11.210515975952148, 'learning_rate': 4.2028985507246374e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4777594804763794, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 8.926612854003906, 'margin_dpo/margin_mean': 8.926612854003906, 'margin_dpo/margin_std': 9.161856651306152, 'logps/chosen': -53.74842071533203, 'logps/rejected': -99.62770080566406, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'KL/chosen_KL_mean': -1.8158931732177734, 'KL/rejected_KL_mean': -10.742504119873047, 'KL/mean': -6.279197692871094, 'KL/std': 8.032249450683594, 'logits/chosen': -0.6375648379325867, 'logits/rejected': -0.5986994504928589, 'epoch': 0.09} + 9%|▊ | 59/681 [02:31<26:28, 2.55s/it] 9%|▉ | 60/681 [02:33<26:15, 2.54s/it] {'loss': 1.3183, 'grad_norm': 9.682544708251953, 'learning_rate': 4.2753623188405794e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4822811782360077, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 7.1014251708984375, 'margin_dpo/margin_mean': 7.101426124572754, 'margin_dpo/margin_std': 8.231400489807129, 'logps/chosen': -64.1506576538086, 'logps/rejected': -95.70329284667969, 'logps/ref_chosen': -60.94218826293945, 'logps/ref_rejected': -85.39340209960938, 'KL/chosen_KL_mean': -3.208467483520508, 'KL/rejected_KL_mean': -10.309898376464844, 'KL/mean': -6.759184837341309, 'KL/std': 7.358757972717285, 'logits/chosen': -0.6127077341079712, 'logits/rejected': -0.5527953505516052, 'epoch': 0.09} + 9%|▉ | 60/681 [02:33<26:15, 2.54s/it] 9%|▉ | 61/681 [02:36<26:30, 2.57s/it] {'loss': 1.3032, 'grad_norm': 10.344194412231445, 'learning_rate': 4.3478260869565214e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.47791624069213867, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 8.9053955078125, 'margin_dpo/margin_mean': 8.9053955078125, 'margin_dpo/margin_std': 12.48222541809082, 'logps/chosen': -62.59632873535156, 'logps/rejected': -100.72069549560547, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'KL/chosen_KL_mean': -1.9628067016601562, 'KL/rejected_KL_mean': -10.86819839477539, 'KL/mean': -6.415502548217773, 'KL/std': 9.966720581054688, 'logits/chosen': -0.624599814414978, 'logits/rejected': -0.5915525555610657, 'epoch': 0.09} + 9%|▉ | 61/681 [02:36<26:30, 2.57s/it] 9%|▉ | 62/681 [02:38<26:57, 2.61s/it] {'loss': 1.3225, 'grad_norm': 8.395082473754883, 'learning_rate': 4.420289855072464e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4832811653614044, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 6.709033012390137, 'margin_dpo/margin_mean': 6.709033012390137, 'margin_dpo/margin_std': 9.149477005004883, 'logps/chosen': -58.25334930419922, 'logps/rejected': -84.3778076171875, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'KL/chosen_KL_mean': -2.1025753021240234, 'KL/rejected_KL_mean': -8.811607360839844, 'KL/mean': -5.457090854644775, 'KL/std': 7.392644882202148, 'logits/chosen': -0.6115210056304932, 'logits/rejected': -0.5771872401237488, 'epoch': 0.09} + 9%|▉ | 62/681 [02:38<26:57, 2.61s/it] 9%|▉ | 63/681 [02:41<26:40, 2.59s/it] {'loss': 1.3, 'grad_norm': 10.433484077453613, 'learning_rate': 4.4927536231884053e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4773363471031189, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 9.097840309143066, 'margin_dpo/margin_mean': 9.09783935546875, 'margin_dpo/margin_std': 10.06234359741211, 'logps/chosen': -76.71784973144531, 'logps/rejected': -110.27836608886719, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'KL/chosen_KL_mean': -3.5704593658447266, 'KL/rejected_KL_mean': -12.66830062866211, 'KL/mean': -8.119380950927734, 'KL/std': 9.557559967041016, 'logits/chosen': -0.6092942953109741, 'logits/rejected': -0.5663588047027588, 'epoch': 0.09} + 9%|▉ | 63/681 [02:41<26:40, 2.59s/it] 9%|▉ | 64/681 [02:43<26:15, 2.55s/it] {'loss': 1.2835, 'grad_norm': 11.34101390838623, 'learning_rate': 4.5652173913043473e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4726361632347107, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 11.019262313842773, 'margin_dpo/margin_mean': 11.019262313842773, 'margin_dpo/margin_std': 12.875155448913574, 'logps/chosen': -55.6339111328125, 'logps/rejected': -106.18477630615234, 'logps/ref_chosen': -53.998600006103516, 'logps/ref_rejected': -93.53019714355469, 'KL/chosen_KL_mean': -1.6353092193603516, 'KL/rejected_KL_mean': -12.65457534790039, 'KL/mean': -7.144941329956055, 'KL/std': 10.44306755065918, 'logits/chosen': -0.5932717323303223, 'logits/rejected': -0.5615238547325134, 'epoch': 0.09} + 9%|▉ | 64/681 [02:43<26:15, 2.55s/it] 10%|▉ | 65/681 [02:46<26:26, 2.58s/it] {'loss': 1.2808, 'grad_norm': 11.773619651794434, 'learning_rate': 4.63768115942029e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.47184616327285767, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 11.32283878326416, 'margin_dpo/margin_mean': 11.322837829589844, 'margin_dpo/margin_std': 13.313655853271484, 'logps/chosen': -69.28094482421875, 'logps/rejected': -125.7142333984375, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'KL/chosen_KL_mean': -4.444938659667969, 'KL/rejected_KL_mean': -15.767776489257812, 'KL/mean': -10.106355667114258, 'KL/std': 11.982595443725586, 'logits/chosen': -0.6665968298912048, 'logits/rejected': -0.6556574106216431, 'epoch': 0.1} + 10%|▉ | 65/681 [02:46<26:26, 2.58s/it] 10%|▉ | 66/681 [02:49<26:34, 2.59s/it] {'loss': 1.2892, 'grad_norm': 10.362818717956543, 'learning_rate': 4.7101449275362313e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4741200804710388, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 10.432148933410645, 'margin_dpo/margin_mean': 10.432148933410645, 'margin_dpo/margin_std': 13.316844940185547, 'logps/chosen': -55.69843673706055, 'logps/rejected': -90.32335662841797, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629913330078, 'KL/chosen_KL_mean': -4.254911422729492, 'KL/rejected_KL_mean': -14.687057495117188, 'KL/mean': -9.470987319946289, 'KL/std': 10.89914321899414, 'logits/chosen': -0.6421518325805664, 'logits/rejected': -0.6104958057403564, 'epoch': 0.1} + 10%|▉ | 66/681 [02:49<26:34, 2.59s/it] 10%|▉ | 67/681 [02:51<25:34, 2.50s/it] {'loss': 1.2828, 'grad_norm': 10.62942886352539, 'learning_rate': 4.782608695652174e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.4722447097301483, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 11.204422950744629, 'margin_dpo/margin_mean': 11.204421997070312, 'margin_dpo/margin_std': 14.561704635620117, 'logps/chosen': -63.7381706237793, 'logps/rejected': -88.38906860351562, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78728485107422, 'KL/chosen_KL_mean': -4.397365570068359, 'KL/rejected_KL_mean': -15.601787567138672, 'KL/mean': -9.999573707580566, 'KL/std': 12.074445724487305, 'logits/chosen': -0.5891748070716858, 'logits/rejected': -0.5434067249298096, 'epoch': 0.1} + 10%|▉ | 67/681 [02:51<25:34, 2.50s/it] 10%|▉ | 68/681 [02:53<25:13, 2.47s/it] {'loss': 1.2886, 'grad_norm': 9.816877365112305, 'learning_rate': 4.855072463768116e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.47402510046958923, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 10.456976890563965, 'margin_dpo/margin_mean': 10.456975936889648, 'margin_dpo/margin_std': 12.803793907165527, 'logps/chosen': -71.73069763183594, 'logps/rejected': -94.18910217285156, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'KL/chosen_KL_mean': -6.524868011474609, 'KL/rejected_KL_mean': -16.981849670410156, 'KL/mean': -11.753357887268066, 'KL/std': 11.021953582763672, 'logits/chosen': -0.6517592668533325, 'logits/rejected': -0.586235761642456, 'epoch': 0.1} + 10%|▉ | 68/681 [02:53<25:13, 2.47s/it] 10%|█ | 69/681 [02:56<26:00, 2.55s/it] {'loss': 1.2463, 'grad_norm': 13.197538375854492, 'learning_rate': 4.927536231884058e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.46209076046943665, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 15.35973834991455, 'margin_dpo/margin_mean': 15.359739303588867, 'margin_dpo/margin_std': 17.261123657226562, 'logps/chosen': -67.82604217529297, 'logps/rejected': -126.75540924072266, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'KL/chosen_KL_mean': -8.00680160522461, 'KL/rejected_KL_mean': -23.366546630859375, 'KL/mean': -15.686670303344727, 'KL/std': 15.096254348754883, 'logits/chosen': -0.6669565439224243, 'logits/rejected': -0.6464905738830566, 'epoch': 0.1} + 10%|█ | 69/681 [02:56<26:00, 2.55s/it] 10%|█ | 70/681 [02:59<25:45, 2.53s/it] {'loss': 1.2415, 'grad_norm': 12.805341720581055, 'learning_rate': 5e-07, 'fcm_dpo/beta': 0.009999999776482582, 'fcm_dpo/q_t': 0.46015501022338867, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 16.18343734741211, 'margin_dpo/margin_mean': 16.183441162109375, 'margin_dpo/margin_std': 20.474119186401367, 'logps/chosen': -72.81818389892578, 'logps/rejected': -118.13177490234375, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.06078338623047, 'KL/chosen_KL_mean': -10.887544631958008, 'KL/rejected_KL_mean': -27.07099151611328, 'KL/mean': -18.979265213012695, 'KL/std': 19.12816619873047, 'logits/chosen': -0.629044771194458, 'logits/rejected': -0.5975900888442993, 'epoch': 0.1} + 10%|█ | 70/681 [02:59<25:45, 2.53s/it] 10%|█ | 71/681 [03:01<25:46, 2.54s/it] {'loss': 1.2088, 'grad_norm': 14.385030746459961, 'learning_rate': 4.999967061337492e-07, 'fcm_dpo/beta': 0.010172600857913494, 'fcm_dpo/q_t': 0.45123565196990967, 'fcm_dpo/delta': 0.08556444197893143, 'fcm_dpo/margin': 19.849708557128906, 'margin_dpo/margin_mean': 19.849708557128906, 'margin_dpo/margin_std': 23.028926849365234, 'logps/chosen': -73.05315399169922, 'logps/rejected': -128.48915100097656, 'logps/ref_chosen': -61.750335693359375, 'logps/ref_rejected': -97.33662414550781, 'KL/chosen_KL_mean': -11.302818298339844, 'KL/rejected_KL_mean': -31.152530670166016, 'KL/mean': -21.22766876220703, 'KL/std': 20.86431121826172, 'logits/chosen': -0.7001615762710571, 'logits/rejected': -0.6704069972038269, 'epoch': 0.1} + 10%|█ | 71/681 [03:01<25:46, 2.54s/it] 11%|█ | 72/681 [03:04<26:01, 2.56s/it] {'loss': 1.2156, 'grad_norm': 13.355144500732422, 'learning_rate': 4.999868246217933e-07, 'fcm_dpo/beta': 0.010172600857913494, 'fcm_dpo/q_t': 0.451375812292099, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 19.57806396484375, 'margin_dpo/margin_mean': 19.57806396484375, 'margin_dpo/margin_std': 27.155168533325195, 'logps/chosen': -79.1748046875, 'logps/rejected': -127.98643493652344, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'KL/chosen_KL_mean': -13.121393203735352, 'KL/rejected_KL_mean': -32.6994514465332, 'KL/mean': -22.910423278808594, 'KL/std': 21.885107040405273, 'logits/chosen': -0.6597040891647339, 'logits/rejected': -0.6241432428359985, 'epoch': 0.11} + 11%|█ | 72/681 [03:04<26:01, 2.56s/it] 11%|█ | 73/681 [03:06<26:21, 2.60s/it] {'loss': 1.2222, 'grad_norm': 14.032218933105469, 'learning_rate': 4.999703557245192e-07, 'fcm_dpo/beta': 0.010345780290663242, 'fcm_dpo/q_t': 0.4510188698768616, 'fcm_dpo/delta': 0.08440417796373367, 'fcm_dpo/margin': 20.00701904296875, 'margin_dpo/margin_mean': 20.00701904296875, 'margin_dpo/margin_std': 35.20468521118164, 'logps/chosen': -81.4280776977539, 'logps/rejected': -125.63496398925781, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613098144531, 'KL/chosen_KL_mean': -15.171804428100586, 'KL/rejected_KL_mean': -35.178829193115234, 'KL/mean': -25.175312042236328, 'KL/std': 27.357677459716797, 'logits/chosen': -0.6953517198562622, 'logits/rejected': -0.6581678986549377, 'epoch': 0.11} + 11%|█ | 73/681 [03:07<26:21, 2.60s/it] 11%|█ | 74/681 [03:09<26:06, 2.58s/it] {'loss': 1.1959, 'grad_norm': 15.119664192199707, 'learning_rate': 4.999472998758977e-07, 'fcm_dpo/beta': 0.010593706741929054, 'fcm_dpo/q_t': 0.4444906413555145, 'fcm_dpo/delta': 0.15896809101104736, 'fcm_dpo/margin': 23.121337890625, 'margin_dpo/margin_mean': 23.121337890625, 'margin_dpo/margin_std': 39.1754150390625, 'logps/chosen': -69.22428894042969, 'logps/rejected': -134.86767578125, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'KL/chosen_KL_mean': -15.799406051635742, 'KL/rejected_KL_mean': -38.92074203491211, 'KL/mean': -27.360076904296875, 'KL/std': 30.7703857421875, 'logits/chosen': -0.6417176723480225, 'logits/rejected': -0.6399871706962585, 'epoch': 0.11} + 11%|█ | 74/681 [03:09<26:06, 2.58s/it] 11%|█ | 75/681 [03:12<26:16, 2.60s/it] {'loss': 1.1055, 'grad_norm': 17.095163345336914, 'learning_rate': 4.999176576834721e-07, 'fcm_dpo/beta': 0.010802132077515125, 'fcm_dpo/q_t': 0.41791272163391113, 'fcm_dpo/delta': 0.050960563123226166, 'fcm_dpo/margin': 32.434326171875, 'margin_dpo/margin_mean': 32.434326171875, 'margin_dpo/margin_std': 37.082366943359375, 'logps/chosen': -67.84478759765625, 'logps/rejected': -159.6714324951172, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25398254394531, 'KL/chosen_KL_mean': -15.983125686645508, 'KL/rejected_KL_mean': -48.417449951171875, 'KL/mean': -32.20029067993164, 'KL/std': 31.59003448486328, 'logits/chosen': -0.6758487224578857, 'logits/rejected': -0.6743229627609253, 'epoch': 0.11} + 11%|█ | 75/681 [03:12<26:16, 2.60s/it] 11%|█ | 76/681 [03:14<26:09, 2.59s/it] {'loss': 1.2096, 'grad_norm': 15.436707496643066, 'learning_rate': 4.998814299283415e-07, 'fcm_dpo/beta': 0.010884184390306473, 'fcm_dpo/q_t': 0.4486614465713501, 'fcm_dpo/delta': 0.08708269149065018, 'fcm_dpo/margin': 19.47739028930664, 'margin_dpo/margin_mean': 19.47739028930664, 'margin_dpo/margin_std': 28.66197395324707, 'logps/chosen': -72.22576141357422, 'logps/rejected': -116.65373229980469, 'logps/ref_chosen': -53.26603698730469, 'logps/ref_rejected': -78.21662902832031, 'KL/chosen_KL_mean': -18.9597225189209, 'KL/rejected_KL_mean': -38.43710708618164, 'KL/mean': -28.698415756225586, 'KL/std': 25.665380477905273, 'logits/chosen': -0.6917558908462524, 'logits/rejected': -0.6552442312240601, 'epoch': 0.11} + 11%|█ | 76/681 [03:14<26:09, 2.59s/it] 11%|█▏ | 77/681 [03:16<25:06, 2.49s/it] {'loss': 1.1015, 'grad_norm': 17.87863540649414, 'learning_rate': 4.998386175651409e-07, 'fcm_dpo/beta': 0.011013032868504524, 'fcm_dpo/q_t': 0.4138905107975006, 'fcm_dpo/delta': 0.036607466638088226, 'fcm_dpo/margin': 33.11540603637695, 'margin_dpo/margin_mean': 33.11540603637695, 'margin_dpo/margin_std': 40.183067321777344, 'logps/chosen': -75.63147735595703, 'logps/rejected': -144.42381286621094, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'KL/chosen_KL_mean': -17.53479766845703, 'KL/rejected_KL_mean': -50.65019989013672, 'KL/mean': -34.09250259399414, 'KL/std': 34.816802978515625, 'logits/chosen': -0.6697767376899719, 'logits/rejected': -0.637236475944519, 'epoch': 0.11} + 11%|█▏ | 77/681 [03:17<25:06, 2.49s/it] 11%|█▏ | 78/681 [03:19<25:31, 2.54s/it] {'loss': 1.1749, 'grad_norm': 15.848010063171387, 'learning_rate': 4.997892217220159e-07, 'fcm_dpo/beta': 0.011299570091068745, 'fcm_dpo/q_t': 0.43751174211502075, 'fcm_dpo/delta': 0.1355305016040802, 'fcm_dpo/margin': 23.709951400756836, 'margin_dpo/margin_mean': 23.709949493408203, 'margin_dpo/margin_std': 35.13151168823242, 'logps/chosen': -73.646484375, 'logps/rejected': -126.67701721191406, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'KL/chosen_KL_mean': -18.032699584960938, 'KL/rejected_KL_mean': -41.74265670776367, 'KL/mean': -29.887676239013672, 'KL/std': 29.635108947753906, 'logits/chosen': -0.6312749981880188, 'logits/rejected': -0.6117902994155884, 'epoch': 0.11} + 11%|█▏ | 78/681 [03:19<25:31, 2.54s/it] 12%|█▏ | 79/681 [03:22<25:46, 2.57s/it] {'loss': 1.1611, 'grad_norm': 15.949761390686035, 'learning_rate': 4.997332437005931e-07, 'fcm_dpo/beta': 0.01154954545199871, 'fcm_dpo/q_t': 0.4317125082015991, 'fcm_dpo/delta': 0.10868433862924576, 'fcm_dpo/margin': 25.500267028808594, 'margin_dpo/margin_mean': 25.500267028808594, 'margin_dpo/margin_std': 38.79698181152344, 'logps/chosen': -74.61249542236328, 'logps/rejected': -132.30984497070312, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'KL/chosen_KL_mean': -19.16200828552246, 'KL/rejected_KL_mean': -44.66227340698242, 'KL/mean': -31.912139892578125, 'KL/std': 31.9647274017334, 'logits/chosen': -0.6368188858032227, 'logits/rejected': -0.6110581755638123, 'epoch': 0.12} + 12%|█▏ | 79/681 [03:22<25:46, 2.57s/it] 12%|█▏ | 80/681 [03:24<25:40, 2.56s/it] {'loss': 1.163, 'grad_norm': 17.33467674255371, 'learning_rate': 4.996706849759452e-07, 'fcm_dpo/beta': 0.011725610122084618, 'fcm_dpo/q_t': 0.43057841062545776, 'fcm_dpo/delta': 0.09062545001506805, 'fcm_dpo/margin': 26.62934112548828, 'margin_dpo/margin_mean': 26.62934112548828, 'margin_dpo/margin_std': 43.98291778564453, 'logps/chosen': -82.26484680175781, 'logps/rejected': -137.9224090576172, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'KL/chosen_KL_mean': -23.745559692382812, 'KL/rejected_KL_mean': -50.374908447265625, 'KL/mean': -37.06023406982422, 'KL/std': 37.243316650390625, 'logits/chosen': -0.7032785415649414, 'logits/rejected': -0.6706737279891968, 'epoch': 0.12} + 12%|█▏ | 80/681 [03:24<25:40, 2.56s/it] 12%|█▏ | 81/681 [03:27<25:57, 2.60s/it] {'loss': 1.0918, 'grad_norm': 18.646865844726562, 'learning_rate': 4.996015471965529e-07, 'fcm_dpo/beta': 0.011759042739868164, 'fcm_dpo/q_t': 0.4062455892562866, 'fcm_dpo/delta': -0.023072410374879837, 'fcm_dpo/margin': 35.88337326049805, 'margin_dpo/margin_mean': 35.88337326049805, 'margin_dpo/margin_std': 52.36148452758789, 'logps/chosen': -89.88636779785156, 'logps/rejected': -188.98358154296875, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'KL/chosen_KL_mean': -23.4375, 'KL/rejected_KL_mean': -59.32087707519531, 'KL/mean': -41.379188537597656, 'KL/std': 43.66413497924805, 'logits/chosen': -0.6688940525054932, 'logits/rejected': -0.6473867893218994, 'epoch': 0.12} + 12%|█▏ | 81/681 [03:27<25:57, 2.60s/it] 12%|█▏ | 82/681 [03:29<25:26, 2.55s/it] {'loss': 1.1808, 'grad_norm': 18.904956817626953, 'learning_rate': 4.995258321842611e-07, 'fcm_dpo/beta': 0.011723190546035767, 'fcm_dpo/q_t': 0.42619970440864563, 'fcm_dpo/delta': -0.040955908596515656, 'fcm_dpo/margin': 28.397132873535156, 'margin_dpo/margin_mean': 28.397132873535156, 'margin_dpo/margin_std': 55.747474670410156, 'logps/chosen': -78.71695709228516, 'logps/rejected': -145.62493896484375, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'KL/chosen_KL_mean': -26.48457145690918, 'KL/rejected_KL_mean': -54.88169479370117, 'KL/mean': -40.68313217163086, 'KL/std': 39.783546447753906, 'logits/chosen': -0.6247996091842651, 'logits/rejected': -0.6217905282974243, 'epoch': 0.12} + 12%|█▏ | 82/681 [03:29<25:26, 2.55s/it] 12%|█▏ | 83/681 [03:32<25:02, 2.51s/it] {'loss': 1.089, 'grad_norm': 19.01769256591797, 'learning_rate': 4.994435419342304e-07, 'fcm_dpo/beta': 0.011612952686846256, 'fcm_dpo/q_t': 0.4054431617259979, 'fcm_dpo/delta': -0.019600681960582733, 'fcm_dpo/margin': 36.04762649536133, 'margin_dpo/margin_mean': 36.04762268066406, 'margin_dpo/margin_std': 50.104408264160156, 'logps/chosen': -82.89398193359375, 'logps/rejected': -166.83013916015625, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71589660644531, 'KL/chosen_KL_mean': -27.066598892211914, 'KL/rejected_KL_mean': -63.11423110961914, 'KL/mean': -45.09041213989258, 'KL/std': 41.50777816772461, 'logits/chosen': -0.6414648294448853, 'logits/rejected': -0.6217591762542725, 'epoch': 0.12} + 12%|█▏ | 83/681 [03:32<25:02, 2.51s/it] 12%|█▏ | 84/681 [03:35<25:42, 2.58s/it] {'loss': 1.1425, 'grad_norm': 18.095876693725586, 'learning_rate': 4.993546786148857e-07, 'fcm_dpo/beta': 0.011577482335269451, 'fcm_dpo/q_t': 0.4257325530052185, 'fcm_dpo/delta': -0.022414665669202805, 'fcm_dpo/margin': 26.913192749023438, 'margin_dpo/margin_mean': 26.913192749023438, 'margin_dpo/margin_std': 35.351402282714844, 'logps/chosen': -95.21951293945312, 'logps/rejected': -142.25514221191406, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'KL/chosen_KL_mean': -28.04334831237793, 'KL/rejected_KL_mean': -54.956539154052734, 'KL/mean': -41.49994659423828, 'KL/std': 35.679141998291016, 'logits/chosen': -0.6499658823013306, 'logits/rejected': -0.6158007383346558, 'epoch': 0.12} + 12%|█▏ | 84/681 [03:35<25:42, 2.58s/it] 12%|█▏ | 85/681 [03:37<26:04, 2.62s/it] {'loss': 1.1441, 'grad_norm': 18.253501892089844, 'learning_rate': 4.992592445678582e-07, 'fcm_dpo/beta': 0.011758394539356232, 'fcm_dpo/q_t': 0.42318564653396606, 'fcm_dpo/delta': 0.06842872500419617, 'fcm_dpo/margin': 28.315706253051758, 'margin_dpo/margin_mean': 28.31570816040039, 'margin_dpo/margin_std': 43.07608413696289, 'logps/chosen': -85.84062194824219, 'logps/rejected': -134.38851928710938, 'logps/ref_chosen': -58.4066162109375, 'logps/ref_rejected': -78.63880157470703, 'KL/chosen_KL_mean': -27.43400764465332, 'KL/rejected_KL_mean': -55.74971389770508, 'KL/mean': -41.591861724853516, 'KL/std': 35.18168258666992, 'logits/chosen': -0.6255546808242798, 'logits/rejected': -0.5947661995887756, 'epoch': 0.12} + 12%|█▏ | 85/681 [03:37<26:04, 2.62s/it] 13%|█▎ | 86/681 [03:40<26:36, 2.68s/it] {'loss': 1.2181, 'grad_norm': 25.211519241333008, 'learning_rate': 4.991572423079235e-07, 'fcm_dpo/beta': 0.011856161057949066, 'fcm_dpo/q_t': 0.42889153957366943, 'fcm_dpo/delta': 0.041851602494716644, 'fcm_dpo/margin': 30.296781539916992, 'margin_dpo/margin_mean': 30.29677963256836, 'margin_dpo/margin_std': 73.2314453125, 'logps/chosen': -92.36730194091797, 'logps/rejected': -154.64825439453125, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'KL/chosen_KL_mean': -36.229835510253906, 'KL/rejected_KL_mean': -66.526611328125, 'KL/mean': -51.37822723388672, 'KL/std': 52.038185119628906, 'logits/chosen': -0.645729124546051, 'logits/rejected': -0.6413381099700928, 'epoch': 0.13} + 13%|█▎ | 86/681 [03:40<26:36, 2.68s/it] 13%|█▎ | 87/681 [03:43<26:23, 2.67s/it] {'loss': 1.1169, 'grad_norm': 21.151241302490234, 'learning_rate': 4.990486745229364e-07, 'fcm_dpo/beta': 0.011678045615553856, 'fcm_dpo/q_t': 0.39871087670326233, 'fcm_dpo/delta': -0.059305619448423386, 'fcm_dpo/margin': 39.041812896728516, 'margin_dpo/margin_mean': 39.041812896728516, 'margin_dpo/margin_std': 65.63897705078125, 'logps/chosen': -90.9077377319336, 'logps/rejected': -169.78103637695312, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'KL/chosen_KL_mean': -35.271644592285156, 'KL/rejected_KL_mean': -74.31346130371094, 'KL/mean': -54.79254913330078, 'KL/std': 52.29801940917969, 'logits/chosen': -0.6645753383636475, 'logits/rejected': -0.6533565521240234, 'epoch': 0.13} + 13%|█▎ | 87/681 [03:43<26:23, 2.67s/it] 13%|█▎ | 88/681 [03:45<26:18, 2.66s/it] {'loss': 1.2051, 'grad_norm': 22.641603469848633, 'learning_rate': 4.989335440737586e-07, 'fcm_dpo/beta': 0.011739738285541534, 'fcm_dpo/q_t': 0.42796188592910767, 'fcm_dpo/delta': 0.05674154311418533, 'fcm_dpo/margin': 29.389965057373047, 'margin_dpo/margin_mean': 29.389965057373047, 'margin_dpo/margin_std': 62.56239318847656, 'logps/chosen': -118.30757904052734, 'logps/rejected': -180.73489379882812, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'KL/chosen_KL_mean': -44.63642883300781, 'KL/rejected_KL_mean': -74.02639770507812, 'KL/mean': -59.331417083740234, 'KL/std': 56.308712005615234, 'logits/chosen': -0.6030087471008301, 'logits/rejected': -0.6048742532730103, 'epoch': 0.13} + 13%|█▎ | 88/681 [03:45<26:18, 2.66s/it] 13%|█▎ | 89/681 [03:48<25:43, 2.61s/it] {'loss': 1.1371, 'grad_norm': 20.236740112304688, 'learning_rate': 4.988118539941847e-07, 'fcm_dpo/beta': 0.011899597942829132, 'fcm_dpo/q_t': 0.4130520224571228, 'fcm_dpo/delta': 0.014035461470484734, 'fcm_dpo/margin': 32.45248794555664, 'margin_dpo/margin_mean': 32.452484130859375, 'margin_dpo/margin_std': 56.894569396972656, 'logps/chosen': -90.20085144042969, 'logps/rejected': -144.11196899414062, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'KL/chosen_KL_mean': -29.5759334564209, 'KL/rejected_KL_mean': -62.02842712402344, 'KL/mean': -45.80217742919922, 'KL/std': 47.946502685546875, 'logits/chosen': -0.6709840297698975, 'logits/rejected': -0.6467102766036987, 'epoch': 0.13} + 13%|█▎ | 89/681 [03:48<25:43, 2.61s/it] 13%|█▎ | 90/681 [03:50<25:15, 2.56s/it] {'loss': 1.0805, 'grad_norm': 20.391284942626953, 'learning_rate': 4.986836074908615e-07, 'fcm_dpo/beta': 0.0116573516279459, 'fcm_dpo/q_t': 0.3860289454460144, 'fcm_dpo/delta': -0.18619467318058014, 'fcm_dpo/margin': 49.350799560546875, 'margin_dpo/margin_mean': 49.350799560546875, 'margin_dpo/margin_std': 80.73963928222656, 'logps/chosen': -86.84383392333984, 'logps/rejected': -194.45404052734375, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'KL/chosen_KL_mean': -33.55852508544922, 'KL/rejected_KL_mean': -82.90933227539062, 'KL/mean': -58.233924865722656, 'KL/std': 63.781768798828125, 'logits/chosen': -0.5895090103149414, 'logits/rejected': -0.614214301109314, 'epoch': 0.13} + 13%|█▎ | 90/681 [03:50<25:15, 2.56s/it] 13%|█▎ | 91/681 [03:53<25:22, 2.58s/it] {'loss': 1.1619, 'grad_norm': 21.17986488342285, 'learning_rate': 4.985488079432037e-07, 'fcm_dpo/beta': 0.011482559144496918, 'fcm_dpo/q_t': 0.41819268465042114, 'fcm_dpo/delta': 0.014481155201792717, 'fcm_dpo/margin': 33.60929870605469, 'margin_dpo/margin_mean': 33.60929870605469, 'margin_dpo/margin_std': 65.38414764404297, 'logps/chosen': -98.32235717773438, 'logps/rejected': -158.00265502929688, 'logps/ref_chosen': -61.802955627441406, 'logps/ref_rejected': -87.87395477294922, 'KL/chosen_KL_mean': -36.51939392089844, 'KL/rejected_KL_mean': -70.12869262695312, 'KL/mean': -53.32404327392578, 'KL/std': 47.886573791503906, 'logits/chosen': -0.6286755800247192, 'logits/rejected': -0.6033735275268555, 'epoch': 0.13} + 13%|█▎ | 91/681 [03:53<25:22, 2.58s/it] 14%|█▎ | 92/681 [03:55<25:00, 2.55s/it] {'loss': 1.1196, 'grad_norm': 19.466535568237305, 'learning_rate': 4.984074589033043e-07, 'fcm_dpo/beta': 0.011489994823932648, 'fcm_dpo/q_t': 0.4099717140197754, 'fcm_dpo/delta': -0.015798617154359818, 'fcm_dpo/margin': 36.09873580932617, 'margin_dpo/margin_mean': 36.09873580932617, 'margin_dpo/margin_std': 59.55793762207031, 'logps/chosen': -82.4329605102539, 'logps/rejected': -144.77210998535156, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'KL/chosen_KL_mean': -30.792192459106445, 'KL/rejected_KL_mean': -66.89093017578125, 'KL/mean': -48.84156036376953, 'KL/std': 49.2903938293457, 'logits/chosen': -0.6480433344841003, 'logits/rejected': -0.6293787360191345, 'epoch': 0.14} + 14%|█▎ | 92/681 [03:56<25:00, 2.55s/it] 14%|█▎ | 93/681 [03:58<23:46, 2.43s/it] {'loss': 1.1109, 'grad_norm': 21.00588607788086, 'learning_rate': 4.982595640958425e-07, 'fcm_dpo/beta': 0.01147179864346981, 'fcm_dpo/q_t': 0.41234347224235535, 'fcm_dpo/delta': 0.006311129778623581, 'fcm_dpo/margin': 34.32225799560547, 'margin_dpo/margin_mean': 34.32225799560547, 'margin_dpo/margin_std': 52.0517578125, 'logps/chosen': -86.86614990234375, 'logps/rejected': -145.81991577148438, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.16075134277344, 'KL/chosen_KL_mean': -34.3369140625, 'KL/rejected_KL_mean': -68.65916442871094, 'KL/mean': -51.498043060302734, 'KL/std': 43.49789810180664, 'logits/chosen': -0.668390154838562, 'logits/rejected': -0.632922887802124, 'epoch': 0.14} + 14%|█▎ | 93/681 [03:58<23:46, 2.43s/it] 14%|█▍ | 94/681 [04:00<24:37, 2.52s/it] {'loss': 1.0606, 'grad_norm': 19.056589126586914, 'learning_rate': 4.98105127417984e-07, 'fcm_dpo/beta': 0.011251532472670078, 'fcm_dpo/q_t': 0.3950212597846985, 'fcm_dpo/delta': -0.07815787196159363, 'fcm_dpo/margin': 42.09075164794922, 'margin_dpo/margin_mean': 42.09075164794922, 'margin_dpo/margin_std': 56.86457061767578, 'logps/chosen': -97.690185546875, 'logps/rejected': -178.15737915039062, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'KL/chosen_KL_mean': -36.46757507324219, 'KL/rejected_KL_mean': -78.55833435058594, 'KL/mean': -57.51295471191406, 'KL/std': 52.84346008300781, 'logits/chosen': -0.6087779998779297, 'logits/rejected': -0.6042633056640625, 'epoch': 0.14} + 14%|█▍ | 94/681 [04:00<24:37, 2.52s/it] 14%|█▍ | 95/681 [04:03<24:25, 2.50s/it] {'loss': 1.1443, 'grad_norm': 19.388639450073242, 'learning_rate': 4.979441529392784e-07, 'fcm_dpo/beta': 0.011258168146014214, 'fcm_dpo/q_t': 0.41526269912719727, 'fcm_dpo/delta': 0.02752673253417015, 'fcm_dpo/margin': 33.15034484863281, 'margin_dpo/margin_mean': 33.15034484863281, 'margin_dpo/margin_std': 56.809608459472656, 'logps/chosen': -88.08385467529297, 'logps/rejected': -144.59091186523438, 'logps/ref_chosen': -52.523643493652344, 'logps/ref_rejected': -75.8803482055664, 'KL/chosen_KL_mean': -35.560211181640625, 'KL/rejected_KL_mean': -68.71056365966797, 'KL/mean': -52.13538360595703, 'KL/std': 53.81283187866211, 'logits/chosen': -0.6361397504806519, 'logits/rejected': -0.6155867576599121, 'epoch': 0.14} + 14%|█▍ | 95/681 [04:03<24:25, 2.50s/it] 14%|█▍ | 96/681 [04:05<24:31, 2.51s/it] {'loss': 1.0532, 'grad_norm': 19.741348266601562, 'learning_rate': 4.977766449015534e-07, 'fcm_dpo/beta': 0.011056499555706978, 'fcm_dpo/q_t': 0.3921157121658325, 'fcm_dpo/delta': -0.11031479388475418, 'fcm_dpo/margin': 45.47876739501953, 'margin_dpo/margin_mean': 45.4787712097168, 'margin_dpo/margin_std': 63.79114532470703, 'logps/chosen': -98.04545593261719, 'logps/rejected': -177.96328735351562, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'KL/chosen_KL_mean': -35.88848876953125, 'KL/rejected_KL_mean': -81.36726379394531, 'KL/mean': -58.62786865234375, 'KL/std': 55.729679107666016, 'logits/chosen': -0.629560112953186, 'logits/rejected': -0.6098443269729614, 'epoch': 0.14} + 14%|█▍ | 96/681 [04:05<24:31, 2.51s/it] 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] {'loss': 1.116, 'grad_norm': 20.09777069091797, 'learning_rate': 4.976026077188012e-07, 'fcm_dpo/beta': 0.01119668036699295, 'fcm_dpo/q_t': 0.41301482915878296, 'fcm_dpo/delta': 0.019375190138816833, 'fcm_dpo/margin': 33.94279861450195, 'margin_dpo/margin_mean': 33.94279479980469, 'margin_dpo/margin_std': 48.3692626953125, 'logps/chosen': -90.79592895507812, 'logps/rejected': -147.05709838867188, 'logps/ref_chosen': -54.646366119384766, 'logps/ref_rejected': -76.96475219726562, 'KL/chosen_KL_mean': -36.149559020996094, 'KL/rejected_KL_mean': -70.09235382080078, 'KL/mean': -53.12095642089844, 'KL/std': 48.205299377441406, 'logits/chosen': -0.5712728500366211, 'logits/rejected': -0.539288341999054, 'epoch': 0.14} + 14%|█▍ | 97/681 [04:08<24:38, 2.53s/it] 14%|█▍ | 98/681 [04:10<24:27, 2.52s/it] {'loss': 1.1381, 'grad_norm': 22.645526885986328, 'learning_rate': 4.974220459770639e-07, 'fcm_dpo/beta': 0.01105651818215847, 'fcm_dpo/q_t': 0.4013606309890747, 'fcm_dpo/delta': -0.02756763994693756, 'fcm_dpo/margin': 38.55535125732422, 'margin_dpo/margin_mean': 38.55535125732422, 'margin_dpo/margin_std': 68.4720687866211, 'logps/chosen': -112.15716552734375, 'logps/rejected': -181.9813690185547, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'KL/chosen_KL_mean': -46.89853286743164, 'KL/rejected_KL_mean': -85.4538803100586, 'KL/mean': -66.17620849609375, 'KL/std': 56.045082092285156, 'logits/chosen': -0.6146658658981323, 'logits/rejected': -0.6087555885314941, 'epoch': 0.14} + 14%|█▍ | 98/681 [04:10<24:27, 2.52s/it] 15%|█▍ | 99/681 [04:13<23:30, 2.42s/it] {'loss': 1.0399, 'grad_norm': 18.111738204956055, 'learning_rate': 4.972349644343108e-07, 'fcm_dpo/beta': 0.010768149048089981, 'fcm_dpo/q_t': 0.38575196266174316, 'fcm_dpo/delta': -0.1532631367444992, 'fcm_dpo/margin': 50.522987365722656, 'margin_dpo/margin_mean': 50.522987365722656, 'margin_dpo/margin_std': 72.46966552734375, 'logps/chosen': -79.99114990234375, 'logps/rejected': -171.31356811523438, 'logps/ref_chosen': -45.638484954833984, 'logps/ref_rejected': -86.43793487548828, 'KL/chosen_KL_mean': -34.3526611328125, 'KL/rejected_KL_mean': -84.87564086914062, 'KL/mean': -59.61415481567383, 'KL/std': 57.15583038330078, 'logits/chosen': -0.5844358205795288, 'logits/rejected': -0.5914009213447571, 'epoch': 0.15} + 15%|█▍ | 99/681 [04:13<23:30, 2.42s/it] 15%|█▍ | 100/681 [04:15<24:02, 2.48s/it] {'loss': 1.2548, 'grad_norm': 22.503297805786133, 'learning_rate': 4.970413680203148e-07, 'fcm_dpo/beta': 0.010749414563179016, 'fcm_dpo/q_t': 0.44148170948028564, 'fcm_dpo/delta': 0.02157057449221611, 'fcm_dpo/margin': 23.65607452392578, 'margin_dpo/margin_mean': 23.65607452392578, 'margin_dpo/margin_std': 61.108909606933594, 'logps/chosen': -97.24137878417969, 'logps/rejected': -137.3636932373047, 'logps/ref_chosen': -57.59397888183594, 'logps/ref_rejected': -74.06021118164062, 'KL/chosen_KL_mean': -39.64739990234375, 'KL/rejected_KL_mean': -63.3034782409668, 'KL/mean': -51.475440979003906, 'KL/std': 47.796791076660156, 'logits/chosen': -0.5854922533035278, 'logits/rejected': -0.5520744323730469, 'epoch': 0.15} + 15%|█▍ | 100/681 [04:15<24:02, 2.48s/it] 15%|█▍ | 101/681 [04:18<23:46, 2.46s/it] {'loss': 1.1985, 'grad_norm': 21.163375854492188, 'learning_rate': 4.968412618365215e-07, 'fcm_dpo/beta': 0.010717286728322506, 'fcm_dpo/q_t': 0.4272317886352539, 'fcm_dpo/delta': -0.025740258395671844, 'fcm_dpo/margin': 31.800077438354492, 'margin_dpo/margin_mean': 31.800079345703125, 'margin_dpo/margin_std': 68.4903793334961, 'logps/chosen': -108.93535614013672, 'logps/rejected': -162.27627563476562, 'logps/ref_chosen': -61.64885330200195, 'logps/ref_rejected': -83.18968200683594, 'KL/chosen_KL_mean': -47.286502838134766, 'KL/rejected_KL_mean': -79.08659362792969, 'KL/mean': -63.186546325683594, 'KL/std': 57.57587432861328, 'logits/chosen': -0.5871464014053345, 'logits/rejected': -0.5648493766784668, 'epoch': 0.15} + 15%|█▍ | 101/681 [04:18<23:46, 2.46s/it] 15%|█▍ | 102/681 [04:20<23:32, 2.44s/it] {'loss': 1.2855, 'grad_norm': 24.271780014038086, 'learning_rate': 4.966346511559149e-07, 'fcm_dpo/beta': 0.010729561559855938, 'fcm_dpo/q_t': 0.44714266061782837, 'fcm_dpo/delta': 0.011440283618867397, 'fcm_dpo/margin': 21.564611434936523, 'margin_dpo/margin_mean': 21.564613342285156, 'margin_dpo/margin_std': 64.52105712890625, 'logps/chosen': -113.45904541015625, 'logps/rejected': -139.13185119628906, 'logps/ref_chosen': -64.0788803100586, 'logps/ref_rejected': -68.18707275390625, 'KL/chosen_KL_mean': -49.380165100097656, 'KL/rejected_KL_mean': -70.94477844238281, 'KL/mean': -60.16246795654297, 'KL/std': 52.73678970336914, 'logits/chosen': -0.6152628660202026, 'logits/rejected': -0.5813932418823242, 'epoch': 0.15} + 15%|█▍ | 102/681 [04:20<23:32, 2.44s/it] 15%|█▌ | 103/681 [04:22<23:16, 2.42s/it] {'loss': 1.0745, 'grad_norm': 21.387969970703125, 'learning_rate': 4.964215414228785e-07, 'fcm_dpo/beta': 0.010541867464780807, 'fcm_dpo/q_t': 0.39261913299560547, 'fcm_dpo/delta': -0.11135346442461014, 'fcm_dpo/margin': 47.94136047363281, 'margin_dpo/margin_mean': 47.94136047363281, 'margin_dpo/margin_std': 75.64704895019531, 'logps/chosen': -103.22856903076172, 'logps/rejected': -183.443359375, 'logps/ref_chosen': -61.299278259277344, 'logps/ref_rejected': -93.57270812988281, 'KL/chosen_KL_mean': -41.929290771484375, 'KL/rejected_KL_mean': -89.87065124511719, 'KL/mean': -65.89997100830078, 'KL/std': 59.839744567871094, 'logits/chosen': -0.5887047052383423, 'logits/rejected': -0.5597223043441772, 'epoch': 0.15} + 15%|█▌ | 103/681 [04:22<23:16, 2.42s/it] 15%|█▌ | 104/681 [04:25<22:41, 2.36s/it] {'loss': 1.104, 'grad_norm': 19.554399490356445, 'learning_rate': 4.96201938253052e-07, 'fcm_dpo/beta': 0.010378319770097733, 'fcm_dpo/q_t': 0.4016958773136139, 'fcm_dpo/delta': -0.08192168176174164, 'fcm_dpo/margin': 46.05828094482422, 'margin_dpo/margin_mean': 46.05828094482422, 'margin_dpo/margin_std': 78.06277465820312, 'logps/chosen': -95.88116455078125, 'logps/rejected': -177.13137817382812, 'logps/ref_chosen': -54.372772216796875, 'logps/ref_rejected': -89.5647201538086, 'KL/chosen_KL_mean': -41.508384704589844, 'KL/rejected_KL_mean': -87.56666564941406, 'KL/mean': -64.53752899169922, 'KL/std': 62.41368103027344, 'logits/chosen': -0.6282894015312195, 'logits/rejected': -0.6099402904510498, 'epoch': 0.15} + 15%|█▌ | 104/681 [04:25<22:41, 2.36s/it] 15%|█▌ | 105/681 [04:27<23:22, 2.43s/it] {'loss': 0.9308, 'grad_norm': 18.77589988708496, 'learning_rate': 4.959758474331832e-07, 'fcm_dpo/beta': 0.009946699254214764, 'fcm_dpo/q_t': 0.3527500033378601, 'fcm_dpo/delta': -0.26617431640625, 'fcm_dpo/margin': 65.26294708251953, 'margin_dpo/margin_mean': 65.26294708251953, 'margin_dpo/margin_std': 64.84896087646484, 'logps/chosen': -93.05621337890625, 'logps/rejected': -201.6537322998047, 'logps/ref_chosen': -54.638946533203125, 'logps/ref_rejected': -97.97351837158203, 'KL/chosen_KL_mean': -38.417266845703125, 'KL/rejected_KL_mean': -103.68021392822266, 'KL/mean': -71.04873657226562, 'KL/std': 57.34044647216797, 'logits/chosen': -0.5998860597610474, 'logits/rejected': -0.5840677618980408, 'epoch': 0.15} + 15%|█▌ | 105/681 [04:27<23:22, 2.43s/it] 16%|█▌ | 106/681 [04:30<23:27, 2.45s/it] {'loss': 1.1055, 'grad_norm': 18.440258026123047, 'learning_rate': 4.957432749209755e-07, 'fcm_dpo/beta': 0.00979258120059967, 'fcm_dpo/q_t': 0.4101172089576721, 'fcm_dpo/delta': 0.008240575902163982, 'fcm_dpo/margin': 40.03329086303711, 'margin_dpo/margin_mean': 40.033287048339844, 'margin_dpo/margin_std': 56.37229919433594, 'logps/chosen': -98.31952667236328, 'logps/rejected': -168.74453735351562, 'logps/ref_chosen': -54.83289337158203, 'logps/ref_rejected': -85.22461700439453, 'KL/chosen_KL_mean': -43.48663330078125, 'KL/rejected_KL_mean': -83.5199203491211, 'KL/mean': -63.503273010253906, 'KL/std': 52.4485969543457, 'logits/chosen': -0.5603185296058655, 'logits/rejected': -0.5446274280548096, 'epoch': 0.16} + 16%|█▌ | 106/681 [04:30<23:27, 2.45s/it] 16%|█▌ | 107/681 [04:32<23:55, 2.50s/it] {'loss': 1.0986, 'grad_norm': 19.489307403564453, 'learning_rate': 4.955042268449307e-07, 'fcm_dpo/beta': 0.009722733870148659, 'fcm_dpo/q_t': 0.40253520011901855, 'fcm_dpo/delta': -0.0357857272028923, 'fcm_dpo/margin': 44.61316680908203, 'margin_dpo/margin_mean': 44.61316680908203, 'margin_dpo/margin_std': 67.18595886230469, 'logps/chosen': -120.71109008789062, 'logps/rejected': -190.35595703125, 'logps/ref_chosen': -69.70780944824219, 'logps/ref_rejected': -94.73950958251953, 'KL/chosen_KL_mean': -51.00328063964844, 'KL/rejected_KL_mean': -95.61644744873047, 'KL/mean': -73.30986022949219, 'KL/std': 63.23443603515625, 'logits/chosen': -0.5885684490203857, 'logits/rejected': -0.549545168876648, 'epoch': 0.16} + 16%|█▌ | 107/681 [04:32<23:55, 2.50s/it] 16%|█▌ | 108/681 [04:35<23:31, 2.46s/it] {'loss': 1.0727, 'grad_norm': 17.88391876220703, 'learning_rate': 4.952587095041881e-07, 'fcm_dpo/beta': 0.009545030072331429, 'fcm_dpo/q_t': 0.3897445499897003, 'fcm_dpo/delta': -0.14720328152179718, 'fcm_dpo/margin': 56.510475158691406, 'margin_dpo/margin_mean': 56.510475158691406, 'margin_dpo/margin_std': 89.7379150390625, 'logps/chosen': -99.2032699584961, 'logps/rejected': -195.4998779296875, 'logps/ref_chosen': -56.0098876953125, 'logps/ref_rejected': -95.79601287841797, 'KL/chosen_KL_mean': -43.193382263183594, 'KL/rejected_KL_mean': -99.703857421875, 'KL/mean': -71.44862365722656, 'KL/std': 69.26988220214844, 'logits/chosen': -0.5926969051361084, 'logits/rejected': -0.5816439390182495, 'epoch': 0.16} + 16%|█▌ | 108/681 [04:35<23:31, 2.46s/it] 16%|█▌ | 109/681 [04:38<24:37, 2.58s/it] {'loss': 1.0587, 'grad_norm': 20.214975357055664, 'learning_rate': 4.95006729368358e-07, 'fcm_dpo/beta': 0.009237101301550865, 'fcm_dpo/q_t': 0.3831191956996918, 'fcm_dpo/delta': -0.13259612023830414, 'fcm_dpo/margin': 56.80544662475586, 'margin_dpo/margin_mean': 56.805450439453125, 'margin_dpo/margin_std': 82.919921875, 'logps/chosen': -103.83297729492188, 'logps/rejected': -196.43865966796875, 'logps/ref_chosen': -62.88549041748047, 'logps/ref_rejected': -98.68573760986328, 'KL/chosen_KL_mean': -40.947486877441406, 'KL/rejected_KL_mean': -97.7529296875, 'KL/mean': -69.35021209716797, 'KL/std': 64.73497772216797, 'logits/chosen': -0.5185987949371338, 'logits/rejected': -0.4973178505897522, 'epoch': 0.16} + 16%|█▌ | 109/681 [04:38<24:37, 2.58s/it] 16%|█▌ | 110/681 [04:40<24:48, 2.61s/it] {'loss': 1.1026, 'grad_norm': 16.951026916503906, 'learning_rate': 4.947482930773511e-07, 'fcm_dpo/beta': 0.009010251611471176, 'fcm_dpo/q_t': 0.39977186918258667, 'fcm_dpo/delta': -0.06750426441431046, 'fcm_dpo/margin': 51.11765670776367, 'margin_dpo/margin_mean': 51.11766052246094, 'margin_dpo/margin_std': 78.87454223632812, 'logps/chosen': -100.72158813476562, 'logps/rejected': -172.8355712890625, 'logps/ref_chosen': -58.753684997558594, 'logps/ref_rejected': -79.75001525878906, 'KL/chosen_KL_mean': -41.9679069519043, 'KL/rejected_KL_mean': -93.08556365966797, 'KL/mean': -67.5267333984375, 'KL/std': 64.46952819824219, 'logits/chosen': -0.5459502339363098, 'logits/rejected': -0.5163878798484802, 'epoch': 0.16} + 16%|█▌ | 110/681 [04:40<24:48, 2.61s/it] 16%|█▋ | 111/681 [04:43<24:31, 2.58s/it] {'loss': 1.0866, 'grad_norm': 19.642478942871094, 'learning_rate': 4.944834074412042e-07, 'fcm_dpo/beta': 0.008878624066710472, 'fcm_dpo/q_t': 0.3876160979270935, 'fcm_dpo/delta': -0.12616059184074402, 'fcm_dpo/margin': 58.406497955322266, 'margin_dpo/margin_mean': 58.40650177001953, 'margin_dpo/margin_std': 92.70616149902344, 'logps/chosen': -115.31114196777344, 'logps/rejected': -203.52239990234375, 'logps/ref_chosen': -68.62410736083984, 'logps/ref_rejected': -98.42886352539062, 'KL/chosen_KL_mean': -46.687034606933594, 'KL/rejected_KL_mean': -105.0935287475586, 'KL/mean': -75.8902816772461, 'KL/std': 71.61009979248047, 'logits/chosen': -0.5734955072402954, 'logits/rejected': -0.5520858764648438, 'epoch': 0.16} + 16%|█▋ | 111/681 [04:43<24:31, 2.58s/it] 16%|█▋ | 112/681 [04:45<23:31, 2.48s/it] {'loss': 1.1682, 'grad_norm': 16.9028263092041, 'learning_rate': 4.942120794399002e-07, 'fcm_dpo/beta': 0.008953899145126343, 'fcm_dpo/q_t': 0.42883390188217163, 'fcm_dpo/delta': 0.09397280961275101, 'fcm_dpo/margin': 34.507667541503906, 'margin_dpo/margin_mean': 34.507667541503906, 'margin_dpo/margin_std': 58.28282165527344, 'logps/chosen': -91.65974426269531, 'logps/rejected': -140.69219970703125, 'logps/ref_chosen': -50.24964141845703, 'logps/ref_rejected': -64.77442932128906, 'KL/chosen_KL_mean': -41.41010284423828, 'KL/rejected_KL_mean': -75.91777038574219, 'KL/mean': -58.663936614990234, 'KL/std': 46.183799743652344, 'logits/chosen': -0.5514860153198242, 'logits/rejected': -0.5197386741638184, 'epoch': 0.16} + 16%|█▋ | 112/681 [04:45<23:31, 2.48s/it] 17%|█▋ | 113/681 [04:48<23:54, 2.52s/it] {'loss': 1.1542, 'grad_norm': 22.58910369873047, 'learning_rate': 4.939343162231841e-07, 'fcm_dpo/beta': 0.009143839590251446, 'fcm_dpo/q_t': 0.42639607191085815, 'fcm_dpo/delta': 0.07819047570228577, 'fcm_dpo/margin': 35.46611785888672, 'margin_dpo/margin_mean': 35.466121673583984, 'margin_dpo/margin_std': 58.21109390258789, 'logps/chosen': -119.92851257324219, 'logps/rejected': -166.650390625, 'logps/ref_chosen': -66.71295166015625, 'logps/ref_rejected': -77.96870422363281, 'KL/chosen_KL_mean': -53.2155647277832, 'KL/rejected_KL_mean': -88.68169403076172, 'KL/mean': -70.94862365722656, 'KL/std': 53.065284729003906, 'logits/chosen': -0.5312271118164062, 'logits/rejected': -0.4909241497516632, 'epoch': 0.17} + 17%|█▋ | 113/681 [04:48<23:54, 2.52s/it] 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] {'loss': 1.0516, 'grad_norm': 19.71776580810547, 'learning_rate': 4.936501251103751e-07, 'fcm_dpo/beta': 0.008939421735703945, 'fcm_dpo/q_t': 0.3906566798686981, 'fcm_dpo/delta': -0.13344621658325195, 'fcm_dpo/margin': 58.719337463378906, 'margin_dpo/margin_mean': 58.719337463378906, 'margin_dpo/margin_std': 87.8689956665039, 'logps/chosen': -104.36673736572266, 'logps/rejected': -192.41064453125, 'logps/ref_chosen': -57.78507995605469, 'logps/ref_rejected': -87.10966491699219, 'KL/chosen_KL_mean': -46.58165740966797, 'KL/rejected_KL_mean': -105.30099487304688, 'KL/mean': -75.94132232666016, 'KL/std': 72.10044860839844, 'logits/chosen': -0.5248334407806396, 'logits/rejected': -0.4965624213218689, 'epoch': 0.17} + 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] 17%|█▋ | 115/681 [04:53<24:09, 2.56s/it] {'loss': 1.2073, 'grad_norm': 25.762571334838867, 'learning_rate': 4.933595135901732e-07, 'fcm_dpo/beta': 0.009012718684971333, 'fcm_dpo/q_t': 0.424325168132782, 'fcm_dpo/delta': 0.034050408750772476, 'fcm_dpo/margin': 40.69775390625, 'margin_dpo/margin_mean': 40.69775390625, 'margin_dpo/margin_std': 93.00070190429688, 'logps/chosen': -124.69458770751953, 'logps/rejected': -198.37521362304688, 'logps/ref_chosen': -65.5826416015625, 'logps/ref_rejected': -98.56552124023438, 'KL/chosen_KL_mean': -59.11194610595703, 'KL/rejected_KL_mean': -99.8096923828125, 'KL/mean': -79.4608154296875, 'KL/std': 69.4343490600586, 'logits/chosen': -0.5764279961585999, 'logits/rejected': -0.5661026239395142, 'epoch': 0.17} + 17%|█▋ | 115/681 [04:53<24:09, 2.56s/it] 17%|█▋ | 116/681 [04:55<23:29, 2.49s/it] {'loss': 1.0915, 'grad_norm': 18.27412223815918, 'learning_rate': 4.930624893204624e-07, 'fcm_dpo/beta': 0.00902644731104374, 'fcm_dpo/q_t': 0.40703320503234863, 'fcm_dpo/delta': -0.012853723019361496, 'fcm_dpo/margin': 45.62263488769531, 'margin_dpo/margin_mean': 45.62263870239258, 'margin_dpo/margin_std': 62.58723831176758, 'logps/chosen': -93.50837707519531, 'logps/rejected': -168.2525634765625, 'logps/ref_chosen': -51.40031433105469, 'logps/ref_rejected': -80.5218505859375, 'KL/chosen_KL_mean': -42.108062744140625, 'KL/rejected_KL_mean': -87.73070526123047, 'KL/mean': -64.91938018798828, 'KL/std': 55.80364990234375, 'logits/chosen': -0.5399957895278931, 'logits/rejected': -0.5367946624755859, 'epoch': 0.17} + 17%|█▋ | 116/681 [04:55<23:29, 2.49s/it] 17%|█▋ | 117/681 [04:58<23:20, 2.48s/it] {'loss': 1.1898, 'grad_norm': 24.457244873046875, 'learning_rate': 4.927590601281083e-07, 'fcm_dpo/beta': 0.009095819666981697, 'fcm_dpo/q_t': 0.430633544921875, 'fcm_dpo/delta': 0.09368915110826492, 'fcm_dpo/margin': 33.996551513671875, 'margin_dpo/margin_mean': 33.996551513671875, 'margin_dpo/margin_std': 68.07157897949219, 'logps/chosen': -124.71703338623047, 'logps/rejected': -155.99917602539062, 'logps/ref_chosen': -69.29840850830078, 'logps/ref_rejected': -66.583984375, 'KL/chosen_KL_mean': -55.41862487792969, 'KL/rejected_KL_mean': -89.41517639160156, 'KL/mean': -72.41690826416016, 'KL/std': 57.73213195800781, 'logits/chosen': -0.527985692024231, 'logits/rejected': -0.4939156174659729, 'epoch': 0.17} + 17%|█▋ | 117/681 [04:58<23:20, 2.48s/it] 17%|█▋ | 118/681 [05:00<23:22, 2.49s/it] {'loss': 1.1042, 'grad_norm': 18.125078201293945, 'learning_rate': 4.924492340087524e-07, 'fcm_dpo/beta': 0.009164330549538136, 'fcm_dpo/q_t': 0.4111817479133606, 'fcm_dpo/delta': 0.006666237488389015, 'fcm_dpo/margin': 42.942481994628906, 'margin_dpo/margin_mean': 42.942481994628906, 'margin_dpo/margin_std': 61.708221435546875, 'logps/chosen': -98.68817138671875, 'logps/rejected': -161.6587371826172, 'logps/ref_chosen': -55.6409797668457, 'logps/ref_rejected': -75.66905975341797, 'KL/chosen_KL_mean': -43.04719161987305, 'KL/rejected_KL_mean': -85.98967742919922, 'KL/mean': -64.5184326171875, 'KL/std': 53.621681213378906, 'logits/chosen': -0.5757678151130676, 'logits/rejected': -0.558840274810791, 'epoch': 0.17} + 17%|█▋ | 118/681 [05:00<23:22, 2.49s/it] 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] {'loss': 1.1422, 'grad_norm': 19.924360275268555, 'learning_rate': 4.92133019126601e-07, 'fcm_dpo/beta': 0.009107567369937897, 'fcm_dpo/q_t': 0.4156750738620758, 'fcm_dpo/delta': 0.011166650801897049, 'fcm_dpo/margin': 42.652496337890625, 'margin_dpo/margin_mean': 42.652496337890625, 'margin_dpo/margin_std': 74.453125, 'logps/chosen': -131.54296875, 'logps/rejected': -203.6625518798828, 'logps/ref_chosen': -73.51019287109375, 'logps/ref_rejected': -102.977294921875, 'KL/chosen_KL_mean': -58.032772064208984, 'KL/rejected_KL_mean': -100.68525695800781, 'KL/mean': -79.35901641845703, 'KL/std': 59.09315490722656, 'logits/chosen': -0.5483442544937134, 'logits/rejected': -0.5380154848098755, 'epoch': 0.17} + 17%|█▋ | 119/681 [05:03<23:53, 2.55s/it] 18%|█▊ | 120/681 [05:06<24:27, 2.62s/it] {'loss': 1.0263, 'grad_norm': 20.973718643188477, 'learning_rate': 4.918104238142103e-07, 'fcm_dpo/beta': 0.008971385657787323, 'fcm_dpo/q_t': 0.381797730922699, 'fcm_dpo/delta': -0.14341270923614502, 'fcm_dpo/margin': 59.69819641113281, 'margin_dpo/margin_mean': 59.69819641113281, 'margin_dpo/margin_std': 77.37503051757812, 'logps/chosen': -134.42904663085938, 'logps/rejected': -225.37014770507812, 'logps/ref_chosen': -76.78083801269531, 'logps/ref_rejected': -108.02374267578125, 'KL/chosen_KL_mean': -57.64821243286133, 'KL/rejected_KL_mean': -117.34640502929688, 'KL/mean': -87.497314453125, 'KL/std': 71.73361206054688, 'logits/chosen': -0.5744304656982422, 'logits/rejected': -0.5464004278182983, 'epoch': 0.18} + 18%|█▊ | 120/681 [05:06<24:27, 2.62s/it] 18%|█▊ | 121/681 [05:08<24:06, 2.58s/it] {'loss': 1.0099, 'grad_norm': 19.9356689453125, 'learning_rate': 4.91481456572267e-07, 'fcm_dpo/beta': 0.008634019643068314, 'fcm_dpo/q_t': 0.37362366914749146, 'fcm_dpo/delta': -0.187477245926857, 'fcm_dpo/margin': 66.66979217529297, 'margin_dpo/margin_mean': 66.66979217529297, 'margin_dpo/margin_std': 85.21018981933594, 'logps/chosen': -115.517333984375, 'logps/rejected': -230.3917999267578, 'logps/ref_chosen': -61.789894104003906, 'logps/ref_rejected': -109.99456787109375, 'KL/chosen_KL_mean': -53.727439880371094, 'KL/rejected_KL_mean': -120.39723205566406, 'KL/mean': -87.06233215332031, 'KL/std': 69.44754028320312, 'logits/chosen': -0.5384774208068848, 'logits/rejected': -0.5364508628845215, 'epoch': 0.18} + 18%|█▊ | 121/681 [05:08<24:06, 2.58s/it] 18%|█▊ | 122/681 [05:10<23:20, 2.51s/it] {'loss': 0.9294, 'grad_norm': 20.8872013092041, 'learning_rate': 4.911461260693638e-07, 'fcm_dpo/beta': 0.008252674713730812, 'fcm_dpo/q_t': 0.35448387265205383, 'fcm_dpo/delta': -0.27251002192497253, 'fcm_dpo/margin': 79.34767150878906, 'margin_dpo/margin_mean': 79.34767150878906, 'margin_dpo/margin_std': 80.3590316772461, 'logps/chosen': -94.33441162109375, 'logps/rejected': -233.4940643310547, 'logps/ref_chosen': -46.9022102355957, 'logps/ref_rejected': -106.71418762207031, 'KL/chosen_KL_mean': -47.43219757080078, 'KL/rejected_KL_mean': -126.77987670898438, 'KL/mean': -87.10603332519531, 'KL/std': 75.40602111816406, 'logits/chosen': -0.4834294021129608, 'logits/rejected': -0.5015791058540344, 'epoch': 0.18} + 18%|█▊ | 122/681 [05:10<23:20, 2.51s/it] 18%|█▊ | 123/681 [05:13<23:54, 2.57s/it] {'loss': 1.1298, 'grad_norm': 21.224868774414062, 'learning_rate': 4.908044411417711e-07, 'fcm_dpo/beta': 0.008060860447585583, 'fcm_dpo/q_t': 0.4093893766403198, 'fcm_dpo/delta': -0.018722567707300186, 'fcm_dpo/margin': 51.75164031982422, 'margin_dpo/margin_mean': 51.75164031982422, 'margin_dpo/margin_std': 88.38829040527344, 'logps/chosen': -114.85078430175781, 'logps/rejected': -193.0391845703125, 'logps/ref_chosen': -61.33863830566406, 'logps/ref_rejected': -87.775390625, 'KL/chosen_KL_mean': -53.51214599609375, 'KL/rejected_KL_mean': -105.2637939453125, 'KL/mean': -79.38796997070312, 'KL/std': 65.03099060058594, 'logits/chosen': -0.504252552986145, 'logits/rejected': -0.48894137144088745, 'epoch': 0.18} + 18%|█▊ | 123/681 [05:13<23:54, 2.57s/it] 18%|█▊ | 124/681 [05:16<23:56, 2.58s/it] {'loss': 1.0383, 'grad_norm': 21.635025024414062, 'learning_rate': 4.904564107932048e-07, 'fcm_dpo/beta': 0.007825289852917194, 'fcm_dpo/q_t': 0.37590959668159485, 'fcm_dpo/delta': -0.21662405133247375, 'fcm_dpo/margin': 77.05169677734375, 'margin_dpo/margin_mean': 77.05169677734375, 'margin_dpo/margin_std': 113.15299987792969, 'logps/chosen': -134.11981201171875, 'logps/rejected': -257.3037414550781, 'logps/ref_chosen': -71.44833374023438, 'logps/ref_rejected': -117.58056640625, 'KL/chosen_KL_mean': -62.671470642089844, 'KL/rejected_KL_mean': -139.72317504882812, 'KL/mean': -101.19732666015625, 'KL/std': 90.27295684814453, 'logits/chosen': -0.4845294654369354, 'logits/rejected': -0.488964319229126, 'epoch': 0.18} + 18%|█▊ | 124/681 [05:16<23:56, 2.58s/it] 18%|█▊ | 125/681 [05:18<23:37, 2.55s/it] {'loss': 1.0649, 'grad_norm': 18.291349411010742, 'learning_rate': 4.90102044194588e-07, 'fcm_dpo/beta': 0.007650085724890232, 'fcm_dpo/q_t': 0.3918633460998535, 'fcm_dpo/delta': -0.10223683714866638, 'fcm_dpo/margin': 64.99887084960938, 'margin_dpo/margin_mean': 64.99887084960938, 'margin_dpo/margin_std': 93.5775375366211, 'logps/chosen': -99.81968688964844, 'logps/rejected': -198.67022705078125, 'logps/ref_chosen': -50.136940002441406, 'logps/ref_rejected': -83.98861694335938, 'KL/chosen_KL_mean': -49.68274688720703, 'KL/rejected_KL_mean': -114.68162536621094, 'KL/mean': -82.18218231201172, 'KL/std': 74.36106872558594, 'logits/chosen': -0.4274330139160156, 'logits/rejected': -0.4299238622188568, 'epoch': 0.18} + 18%|█▊ | 125/681 [05:18<23:37, 2.55s/it] 19%|█▊ | 126/681 [05:21<23:52, 2.58s/it] {'loss': 1.0617, 'grad_norm': 18.59366226196289, 'learning_rate': 4.897413506838102e-07, 'fcm_dpo/beta': 0.007492750883102417, 'fcm_dpo/q_t': 0.3952334523200989, 'fcm_dpo/delta': -0.08181630074977875, 'fcm_dpo/margin': 63.75071716308594, 'margin_dpo/margin_mean': 63.75071716308594, 'margin_dpo/margin_std': 88.2365493774414, 'logps/chosen': -108.67198181152344, 'logps/rejected': -214.8853759765625, 'logps/ref_chosen': -55.66706848144531, 'logps/ref_rejected': -98.1297607421875, 'KL/chosen_KL_mean': -53.004913330078125, 'KL/rejected_KL_mean': -116.75562286376953, 'KL/mean': -84.8802719116211, 'KL/std': 69.43212890625, 'logits/chosen': -0.4854479432106018, 'logits/rejected': -0.4832964539527893, 'epoch': 0.19} + 19%|█▊ | 126/681 [05:21<23:52, 2.58s/it] 19%|█▊ | 127/681 [05:24<24:04, 2.61s/it] {'loss': 1.152, 'grad_norm': 17.630146026611328, 'learning_rate': 4.89374339765481e-07, 'fcm_dpo/beta': 0.007554207928478718, 'fcm_dpo/q_t': 0.4214317202568054, 'fcm_dpo/delta': 0.06351131945848465, 'fcm_dpo/margin': 44.81909942626953, 'margin_dpo/margin_mean': 44.81909942626953, 'margin_dpo/margin_std': 74.9388198852539, 'logps/chosen': -108.04594421386719, 'logps/rejected': -173.10614013671875, 'logps/ref_chosen': -56.55467987060547, 'logps/ref_rejected': -76.7957763671875, 'KL/chosen_KL_mean': -51.491268157958984, 'KL/rejected_KL_mean': -96.31036376953125, 'KL/mean': -73.90081787109375, 'KL/std': 60.034873962402344, 'logits/chosen': -0.4814761281013489, 'logits/rejected': -0.46358734369277954, 'epoch': 0.19} + 19%|█▊ | 127/681 [05:24<24:04, 2.61s/it] 19%|█▉ | 128/681 [05:26<24:17, 2.63s/it] {'loss': 1.1543, 'grad_norm': 19.399093627929688, 'learning_rate': 4.890010211106795e-07, 'fcm_dpo/beta': 0.007643786258995533, 'fcm_dpo/q_t': 0.4184267520904541, 'fcm_dpo/delta': 0.03590956702828407, 'fcm_dpo/margin': 47.74309539794922, 'margin_dpo/margin_mean': 47.74309539794922, 'margin_dpo/margin_std': 85.46942138671875, 'logps/chosen': -111.0107421875, 'logps/rejected': -177.0718536376953, 'logps/ref_chosen': -58.12095642089844, 'logps/ref_rejected': -76.43896484375, 'KL/chosen_KL_mean': -52.88978958129883, 'KL/rejected_KL_mean': -100.63288879394531, 'KL/mean': -76.76133728027344, 'KL/std': 68.35536193847656, 'logits/chosen': -0.5004081726074219, 'logits/rejected': -0.4844392240047455, 'epoch': 0.19} + 19%|█▉ | 128/681 [05:26<24:17, 2.63s/it] 19%|█▉ | 129/681 [05:29<24:04, 2.62s/it] {'loss': 1.1857, 'grad_norm': 19.868791580200195, 'learning_rate': 4.88621404556699e-07, 'fcm_dpo/beta': 0.007660663686692715, 'fcm_dpo/q_t': 0.42177292704582214, 'fcm_dpo/delta': 0.027477692812681198, 'fcm_dpo/margin': 48.760475158691406, 'margin_dpo/margin_mean': 48.760475158691406, 'margin_dpo/margin_std': 104.92391967773438, 'logps/chosen': -132.6300048828125, 'logps/rejected': -211.11631774902344, 'logps/ref_chosen': -66.91637420654297, 'logps/ref_rejected': -96.6422119140625, 'KL/chosen_KL_mean': -65.713623046875, 'KL/rejected_KL_mean': -114.47410583496094, 'KL/mean': -90.0938720703125, 'KL/std': 78.53107452392578, 'logits/chosen': -0.5160699486732483, 'logits/rejected': -0.5076286792755127, 'epoch': 0.19} + 19%|█▉ | 129/681 [05:29<24:04, 2.62s/it] 19%|█▉ | 130/681 [05:31<23:25, 2.55s/it] {'loss': 1.0229, 'grad_norm': 16.111379623413086, 'learning_rate': 4.882355001067891e-07, 'fcm_dpo/beta': 0.0075556435622274876, 'fcm_dpo/q_t': 0.37920111417770386, 'fcm_dpo/delta': -0.16790251433849335, 'fcm_dpo/margin': 73.87777709960938, 'margin_dpo/margin_mean': 73.87777709960938, 'margin_dpo/margin_std': 93.94640350341797, 'logps/chosen': -91.8436508178711, 'logps/rejected': -203.83624267578125, 'logps/ref_chosen': -44.66685104370117, 'logps/ref_rejected': -82.78165435791016, 'KL/chosen_KL_mean': -47.17679977416992, 'KL/rejected_KL_mean': -121.05458068847656, 'KL/mean': -84.11569213867188, 'KL/std': 75.62738037109375, 'logits/chosen': -0.4705796241760254, 'logits/rejected': -0.46549493074417114, 'epoch': 0.19} + 19%|█▉ | 130/681 [05:31<23:25, 2.55s/it] 19%|█▉ | 131/681 [05:34<23:17, 2.54s/it] {'loss': 1.0082, 'grad_norm': 20.645126342773438, 'learning_rate': 4.878433179298909e-07, 'fcm_dpo/beta': 0.007246783934533596, 'fcm_dpo/q_t': 0.3767462372779846, 'fcm_dpo/delta': -0.1462840735912323, 'fcm_dpo/margin': 74.26715087890625, 'margin_dpo/margin_mean': 74.26715850830078, 'margin_dpo/margin_std': 86.19850158691406, 'logps/chosen': -88.26079559326172, 'logps/rejected': -206.04736328125, 'logps/ref_chosen': -44.924591064453125, 'logps/ref_rejected': -88.44401550292969, 'KL/chosen_KL_mean': -43.336204528808594, 'KL/rejected_KL_mean': -117.60336303710938, 'KL/mean': -80.46978759765625, 'KL/std': 78.42054748535156, 'logits/chosen': -0.4658737778663635, 'logits/rejected': -0.47121483087539673, 'epoch': 0.19} + 19%|█▉ | 131/681 [05:34<23:17, 2.54s/it] 19%|█▉ | 132/681 [05:36<23:33, 2.57s/it] {'loss': 1.1026, 'grad_norm': 18.950071334838867, 'learning_rate': 4.874448683603694e-07, 'fcm_dpo/beta': 0.007140764966607094, 'fcm_dpo/q_t': 0.4060777425765991, 'fcm_dpo/delta': -0.04240689054131508, 'fcm_dpo/margin': 61.66801452636719, 'margin_dpo/margin_mean': 61.66801452636719, 'margin_dpo/margin_std': 99.96194458007812, 'logps/chosen': -115.6361083984375, 'logps/rejected': -206.1951904296875, 'logps/ref_chosen': -59.00108337402344, 'logps/ref_rejected': -87.89215087890625, 'KL/chosen_KL_mean': -56.63502502441406, 'KL/rejected_KL_mean': -118.30303955078125, 'KL/mean': -87.46903228759766, 'KL/std': 77.27864074707031, 'logits/chosen': -0.5243598222732544, 'logits/rejected': -0.525653600692749, 'epoch': 0.19} + 19%|█▉ | 132/681 [05:36<23:33, 2.57s/it] 20%|█▉ | 133/681 [05:39<23:41, 2.59s/it] {'loss': 1.1346, 'grad_norm': 20.872831344604492, 'learning_rate': 4.870401618977415e-07, 'fcm_dpo/beta': 0.007148602977395058, 'fcm_dpo/q_t': 0.4168737530708313, 'fcm_dpo/delta': 0.022928927093744278, 'fcm_dpo/margin': 52.86082458496094, 'margin_dpo/margin_mean': 52.86082458496094, 'margin_dpo/margin_std': 88.52780151367188, 'logps/chosen': -134.1541748046875, 'logps/rejected': -216.74404907226562, 'logps/ref_chosen': -66.60449981689453, 'logps/ref_rejected': -96.33355712890625, 'KL/chosen_KL_mean': -67.54967498779297, 'KL/rejected_KL_mean': -120.41049194335938, 'KL/mean': -93.98008728027344, 'KL/std': 68.62925720214844, 'logits/chosen': -0.479339599609375, 'logits/rejected': -0.46433907747268677, 'epoch': 0.2} + 20%|█▉ | 133/681 [05:39<23:41, 2.59s/it] 20%|█▉ | 134/681 [05:41<23:02, 2.53s/it] {'loss': 1.0822, 'grad_norm': 16.936574935913086, 'learning_rate': 4.866292092063986e-07, 'fcm_dpo/beta': 0.0071844616904854774, 'fcm_dpo/q_t': 0.40691226720809937, 'fcm_dpo/delta': -0.010975977405905724, 'fcm_dpo/margin': 57.118778228759766, 'margin_dpo/margin_mean': 57.118778228759766, 'margin_dpo/margin_std': 74.62931060791016, 'logps/chosen': -103.90907287597656, 'logps/rejected': -196.61309814453125, 'logps/ref_chosen': -52.06925582885742, 'logps/ref_rejected': -87.6545181274414, 'KL/chosen_KL_mean': -51.839813232421875, 'KL/rejected_KL_mean': -108.95858001708984, 'KL/mean': -80.39920043945312, 'KL/std': 64.22247314453125, 'logits/chosen': -0.4439271092414856, 'logits/rejected': -0.429843544960022, 'epoch': 0.2} + 20%|█▉ | 134/681 [05:41<23:02, 2.53s/it] 20%|█▉ | 135/681 [05:44<22:48, 2.51s/it] {'loss': 1.009, 'grad_norm': 18.334857940673828, 'learning_rate': 4.862120211153265e-07, 'fcm_dpo/beta': 0.006944045424461365, 'fcm_dpo/q_t': 0.3745703101158142, 'fcm_dpo/delta': -0.1996196210384369, 'fcm_dpo/margin': 84.63871765136719, 'margin_dpo/margin_mean': 84.63871765136719, 'margin_dpo/margin_std': 110.02676391601562, 'logps/chosen': -108.04621887207031, 'logps/rejected': -258.310791015625, 'logps/ref_chosen': -50.353858947753906, 'logps/ref_rejected': -115.97975158691406, 'KL/chosen_KL_mean': -57.692352294921875, 'KL/rejected_KL_mean': -142.3310546875, 'KL/mean': -100.01170349121094, 'KL/std': 90.0093002319336, 'logits/chosen': -0.4614737033843994, 'logits/rejected': -0.49732786417007446, 'epoch': 0.2} + 20%|█▉ | 135/681 [05:44<22:48, 2.51s/it] 20%|█▉ | 136/681 [05:47<23:19, 2.57s/it] {'loss': 1.1537, 'grad_norm': 19.6416015625, 'learning_rate': 4.857886086178193e-07, 'fcm_dpo/beta': 0.006846585310995579, 'fcm_dpo/q_t': 0.4213330149650574, 'fcm_dpo/delta': 0.007697347551584244, 'fcm_dpo/margin': 57.25541305541992, 'margin_dpo/margin_mean': 57.255409240722656, 'margin_dpo/margin_std': 109.88507080078125, 'logps/chosen': -134.77195739746094, 'logps/rejected': -223.27609252929688, 'logps/ref_chosen': -65.072509765625, 'logps/ref_rejected': -96.32122802734375, 'KL/chosen_KL_mean': -69.69944763183594, 'KL/rejected_KL_mean': -126.95484924316406, 'KL/mean': -98.3271484375, 'KL/std': 84.19580078125, 'logits/chosen': -0.4833676218986511, 'logits/rejected': -0.4771433472633362, 'epoch': 0.2} + 20%|█▉ | 136/681 [05:47<23:19, 2.57s/it] 20%|██ | 137/681 [05:49<23:13, 2.56s/it] {'loss': 1.0331, 'grad_norm': 16.7557373046875, 'learning_rate': 4.853589828711902e-07, 'fcm_dpo/beta': 0.006671931594610214, 'fcm_dpo/q_t': 0.3791872262954712, 'fcm_dpo/delta': -0.20688247680664062, 'fcm_dpo/margin': 89.07612609863281, 'margin_dpo/margin_mean': 89.07611846923828, 'margin_dpo/margin_std': 130.33053588867188, 'logps/chosen': -113.42877197265625, 'logps/rejected': -267.60955810546875, 'logps/ref_chosen': -48.759117126464844, 'logps/ref_rejected': -113.86376953125, 'KL/chosen_KL_mean': -64.6696548461914, 'KL/rejected_KL_mean': -153.74578857421875, 'KL/mean': -109.20771789550781, 'KL/std': 107.58679962158203, 'logits/chosen': -0.419033944606781, 'logits/rejected': -0.4432998299598694, 'epoch': 0.2} + 20%|██ | 137/681 [05:49<23:13, 2.56s/it] 20%|██ | 138/681 [05:51<22:32, 2.49s/it] {'loss': 1.0613, 'grad_norm': 18.415468215942383, 'learning_rate': 4.849231551964771e-07, 'fcm_dpo/beta': 0.006574639119207859, 'fcm_dpo/q_t': 0.39788979291915894, 'fcm_dpo/delta': -0.04935392364859581, 'fcm_dpo/margin': 68.0129165649414, 'margin_dpo/margin_mean': 68.0129165649414, 'margin_dpo/margin_std': 85.65242004394531, 'logps/chosen': -129.9835205078125, 'logps/rejected': -230.67373657226562, 'logps/ref_chosen': -60.519649505615234, 'logps/ref_rejected': -93.19694519042969, 'KL/chosen_KL_mean': -69.46388244628906, 'KL/rejected_KL_mean': -137.47679138183594, 'KL/mean': -103.4703369140625, 'KL/std': 83.18930053710938, 'logits/chosen': -0.40760838985443115, 'logits/rejected': -0.3944551348686218, 'epoch': 0.2} + 20%|██ | 138/681 [05:51<22:32, 2.49s/it] 20%|██ | 139/681 [05:54<22:13, 2.46s/it] {'loss': 1.0361, 'grad_norm': 17.002193450927734, 'learning_rate': 4.844811370781446e-07, 'fcm_dpo/beta': 0.006447950378060341, 'fcm_dpo/q_t': 0.38780367374420166, 'fcm_dpo/delta': -0.11413857340812683, 'fcm_dpo/margin': 78.85862731933594, 'margin_dpo/margin_mean': 78.85863494873047, 'margin_dpo/margin_std': 103.01472473144531, 'logps/chosen': -105.60960388183594, 'logps/rejected': -217.30484008789062, 'logps/ref_chosen': -46.89138412475586, 'logps/ref_rejected': -79.72798156738281, 'KL/chosen_KL_mean': -58.71821975708008, 'KL/rejected_KL_mean': -137.57684326171875, 'KL/mean': -98.14753723144531, 'KL/std': 77.90769958496094, 'logits/chosen': -0.40922728180885315, 'logits/rejected': -0.39873528480529785, 'epoch': 0.2} + 20%|██ | 139/681 [05:54<22:13, 2.46s/it] 21%|██ | 140/681 [05:56<21:55, 2.43s/it] {'loss': 1.0896, 'grad_norm': 19.857847213745117, 'learning_rate': 4.840329401637809e-07, 'fcm_dpo/beta': 0.006347954273223877, 'fcm_dpo/q_t': 0.4006612300872803, 'fcm_dpo/delta': -0.04827836528420448, 'fcm_dpo/margin': 70.2745361328125, 'margin_dpo/margin_mean': 70.2745361328125, 'margin_dpo/margin_std': 105.37348937988281, 'logps/chosen': -130.43838500976562, 'logps/rejected': -225.02232360839844, 'logps/ref_chosen': -58.97471618652344, 'logps/ref_rejected': -83.28410339355469, 'KL/chosen_KL_mean': -71.46368408203125, 'KL/rejected_KL_mean': -141.73822021484375, 'KL/mean': -106.60093688964844, 'KL/std': 86.1796875, 'logits/chosen': -0.3932759761810303, 'logits/rejected': -0.37940922379493713, 'epoch': 0.21} + 21%|██ | 140/681 [05:56<21:55, 2.43s/it] 21%|██ | 141/681 [05:59<22:19, 2.48s/it] {'loss': 1.1267, 'grad_norm': 25.337135314941406, 'learning_rate': 4.83578576263792e-07, 'fcm_dpo/beta': 0.006319768726825714, 'fcm_dpo/q_t': 0.4057791233062744, 'fcm_dpo/delta': -0.022237718105316162, 'fcm_dpo/margin': 66.66454315185547, 'margin_dpo/margin_mean': 66.66454315185547, 'margin_dpo/margin_std': 114.31845092773438, 'logps/chosen': -156.29470825195312, 'logps/rejected': -246.07583618164062, 'logps/ref_chosen': -75.07566833496094, 'logps/ref_rejected': -98.1922607421875, 'KL/chosen_KL_mean': -81.21903991699219, 'KL/rejected_KL_mean': -147.88356018066406, 'KL/mean': -114.55131530761719, 'KL/std': 97.91165161132812, 'logits/chosen': -0.4274560213088989, 'logits/rejected': -0.41715872287750244, 'epoch': 0.21} + 21%|██ | 141/681 [05:59<22:19, 2.48s/it] 21%|██ | 142/681 [06:01<22:59, 2.56s/it] {'loss': 1.095, 'grad_norm': 26.112525939941406, 'learning_rate': 4.83118057351089e-07, 'fcm_dpo/beta': 0.006258774548768997, 'fcm_dpo/q_t': 0.39626699686050415, 'fcm_dpo/delta': -0.08255193382501602, 'fcm_dpo/margin': 76.46134185791016, 'margin_dpo/margin_mean': 76.46134185791016, 'margin_dpo/margin_std': 119.62361145019531, 'logps/chosen': -137.92025756835938, 'logps/rejected': -250.93588256835938, 'logps/ref_chosen': -58.027931213378906, 'logps/ref_rejected': -94.58222961425781, 'KL/chosen_KL_mean': -79.89231872558594, 'KL/rejected_KL_mean': -156.35366821289062, 'KL/mean': -118.12299346923828, 'KL/std': 101.84807586669922, 'logits/chosen': -0.4015616774559021, 'logits/rejected': -0.4008292555809021, 'epoch': 0.21} + 21%|██ | 142/681 [06:02<22:59, 2.56s/it] 21%|██ | 143/681 [06:04<23:28, 2.62s/it] {'loss': 1.2025, 'grad_norm': 23.864248275756836, 'learning_rate': 4.826513955607734e-07, 'fcm_dpo/beta': 0.006263419054448605, 'fcm_dpo/q_t': 0.4340221583843231, 'fcm_dpo/delta': 0.09250222891569138, 'fcm_dpo/margin': 49.55633544921875, 'margin_dpo/margin_mean': 49.55633544921875, 'margin_dpo/margin_std': 106.16546630859375, 'logps/chosen': -141.7500457763672, 'logps/rejected': -212.70950317382812, 'logps/ref_chosen': -57.59645080566406, 'logps/ref_rejected': -78.99957275390625, 'KL/chosen_KL_mean': -84.15359497070312, 'KL/rejected_KL_mean': -133.70993041992188, 'KL/mean': -108.9317626953125, 'KL/std': 89.8084945678711, 'logits/chosen': -0.3417607545852661, 'logits/rejected': -0.33318936824798584, 'epoch': 0.21} + 21%|██ | 143/681 [06:04<23:28, 2.62s/it] 21%|██ | 144/681 [06:07<23:52, 2.67s/it] {'loss': 1.1285, 'grad_norm': 21.587350845336914, 'learning_rate': 4.821786031898176e-07, 'fcm_dpo/beta': 0.006343062035739422, 'fcm_dpo/q_t': 0.41614866256713867, 'fcm_dpo/delta': 0.029245702549815178, 'fcm_dpo/margin': 58.62339782714844, 'margin_dpo/margin_mean': 58.62339782714844, 'margin_dpo/margin_std': 91.50389099121094, 'logps/chosen': -136.1982421875, 'logps/rejected': -216.91552734375, 'logps/ref_chosen': -59.90636444091797, 'logps/ref_rejected': -82.00025939941406, 'KL/chosen_KL_mean': -76.2918701171875, 'KL/rejected_KL_mean': -134.91525268554688, 'KL/mean': -105.60356140136719, 'KL/std': 77.67918395996094, 'logits/chosen': -0.37479305267333984, 'logits/rejected': -0.3632616400718689, 'epoch': 0.21} + 21%|██ | 144/681 [06:07<23:52, 2.67s/it] 21%|██▏ | 145/681 [06:10<23:20, 2.61s/it] {'loss': 1.1044, 'grad_norm': 22.88797950744629, 'learning_rate': 4.816996926967401e-07, 'fcm_dpo/beta': 0.006349918898195028, 'fcm_dpo/q_t': 0.4085870385169983, 'fcm_dpo/delta': -0.0037822211161255836, 'fcm_dpo/margin': 63.56344985961914, 'margin_dpo/margin_mean': 63.56344985961914, 'margin_dpo/margin_std': 93.52117919921875, 'logps/chosen': -129.76637268066406, 'logps/rejected': -214.595458984375, 'logps/ref_chosen': -56.60066604614258, 'logps/ref_rejected': -77.86631774902344, 'KL/chosen_KL_mean': -73.16571044921875, 'KL/rejected_KL_mean': -136.72915649414062, 'KL/mean': -104.94743347167969, 'KL/std': 75.64851379394531, 'logits/chosen': -0.40823960304260254, 'logits/rejected': -0.3923068642616272, 'epoch': 0.21} + 21%|██▏ | 145/681 [06:10<23:20, 2.61s/it] 21%|██▏ | 146/681 [06:12<23:07, 2.59s/it] {'loss': 1.1884, 'grad_norm': 26.765342712402344, 'learning_rate': 4.812146767012779e-07, 'fcm_dpo/beta': 0.006410893052816391, 'fcm_dpo/q_t': 0.427983820438385, 'fcm_dpo/delta': 0.08496344089508057, 'fcm_dpo/margin': 49.569610595703125, 'margin_dpo/margin_mean': 49.569610595703125, 'margin_dpo/margin_std': 97.2584228515625, 'logps/chosen': -162.42832946777344, 'logps/rejected': -227.7002716064453, 'logps/ref_chosen': -66.00045013427734, 'logps/ref_rejected': -81.70278930664062, 'KL/chosen_KL_mean': -96.4278793334961, 'KL/rejected_KL_mean': -145.9974822998047, 'KL/mean': -121.21267700195312, 'KL/std': 80.74990844726562, 'logits/chosen': -0.3858996033668518, 'logits/rejected': -0.3604584336280823, 'epoch': 0.21} + 21%|██▏ | 146/681 [06:12<23:07, 2.59s/it] 22%|██▏ | 147/681 [06:15<23:07, 2.60s/it] {'loss': 1.1021, 'grad_norm': 20.992704391479492, 'learning_rate': 4.807235679840536e-07, 'fcm_dpo/beta': 0.006426135078072548, 'fcm_dpo/q_t': 0.40504029393196106, 'fcm_dpo/delta': -0.029495222494006157, 'fcm_dpo/margin': 66.63192749023438, 'margin_dpo/margin_mean': 66.63192749023438, 'margin_dpo/margin_std': 102.30528259277344, 'logps/chosen': -124.03260803222656, 'logps/rejected': -208.649658203125, 'logps/ref_chosen': -53.405487060546875, 'logps/ref_rejected': -71.39060974121094, 'KL/chosen_KL_mean': -70.62712097167969, 'KL/rejected_KL_mean': -137.25904846191406, 'KL/mean': -103.94308471679688, 'KL/std': 82.13803100585938, 'logits/chosen': -0.41126739978790283, 'logits/rejected': -0.39271873235702515, 'epoch': 0.22} + 22%|██▏ | 147/681 [06:15<23:07, 2.60s/it] 22%|██▏ | 148/681 [06:17<23:00, 2.59s/it] {'loss': 1.1302, 'grad_norm': 18.33550262451172, 'learning_rate': 4.802263794862384e-07, 'fcm_dpo/beta': 0.0063689956441521645, 'fcm_dpo/q_t': 0.41790372133255005, 'fcm_dpo/delta': -0.08440735191106796, 'fcm_dpo/margin': 58.8685302734375, 'margin_dpo/margin_mean': 58.868534088134766, 'margin_dpo/margin_std': 89.78065490722656, 'logps/chosen': -135.42489624023438, 'logps/rejected': -232.4501953125, 'logps/ref_chosen': -64.93708038330078, 'logps/ref_rejected': -103.09384155273438, 'KL/chosen_KL_mean': -70.48782348632812, 'KL/rejected_KL_mean': -129.35635375976562, 'KL/mean': -99.92208862304688, 'KL/std': 84.81340026855469, 'logits/chosen': -0.44826143980026245, 'logits/rejected': -0.4405372738838196, 'epoch': 0.22} + 22%|██▏ | 148/681 [06:17<23:00, 2.59s/it] 22%|██▏ | 149/681 [06:20<23:11, 2.62s/it] {'loss': 1.0654, 'grad_norm': 17.068838119506836, 'learning_rate': 4.797231243092118e-07, 'fcm_dpo/beta': 0.006224669516086578, 'fcm_dpo/q_t': 0.39915308356285095, 'fcm_dpo/delta': -0.043726127594709396, 'fcm_dpo/margin': 70.72378540039062, 'margin_dpo/margin_mean': 70.72378540039062, 'margin_dpo/margin_std': 85.64163208007812, 'logps/chosen': -125.36762237548828, 'logps/rejected': -236.93238830566406, 'logps/ref_chosen': -58.47376251220703, 'logps/ref_rejected': -99.31474304199219, 'KL/chosen_KL_mean': -66.89385986328125, 'KL/rejected_KL_mean': -137.61764526367188, 'KL/mean': -102.25575256347656, 'KL/std': 72.74675750732422, 'logits/chosen': -0.4722484350204468, 'logits/rejected': -0.4587002694606781, 'epoch': 0.22} + 22%|██▏ | 149/681 [06:20<23:11, 2.62s/it] 22%|██▏ | 150/681 [06:23<23:06, 2.61s/it] {'loss': 1.0768, 'grad_norm': 17.37567710876465, 'learning_rate': 4.792138157142157e-07, 'fcm_dpo/beta': 0.006181714590638876, 'fcm_dpo/q_t': 0.4037661552429199, 'fcm_dpo/delta': -0.04127602279186249, 'fcm_dpo/margin': 70.91671752929688, 'margin_dpo/margin_mean': 70.91671752929688, 'margin_dpo/margin_std': 98.11479187011719, 'logps/chosen': -103.76856994628906, 'logps/rejected': -212.3270721435547, 'logps/ref_chosen': -45.705810546875, 'logps/ref_rejected': -83.34759521484375, 'KL/chosen_KL_mean': -58.0627555847168, 'KL/rejected_KL_mean': -128.97947692871094, 'KL/mean': -93.5211181640625, 'KL/std': 83.53118896484375, 'logits/chosen': -0.4367871582508087, 'logits/rejected': -0.43975830078125, 'epoch': 0.22} + 22%|██▏ | 150/681 [06:23<23:06, 2.61s/it] 22%|██▏ | 151/681 [06:25<22:33, 2.55s/it] {'loss': 1.065, 'grad_norm': 21.97403907775879, 'learning_rate': 4.786984671220053e-07, 'fcm_dpo/beta': 0.006163077428936958, 'fcm_dpo/q_t': 0.3990030288696289, 'fcm_dpo/delta': -0.044833216816186905, 'fcm_dpo/margin': 71.84840393066406, 'margin_dpo/margin_mean': 71.84840393066406, 'margin_dpo/margin_std': 91.86006164550781, 'logps/chosen': -144.35452270507812, 'logps/rejected': -246.09591674804688, 'logps/ref_chosen': -70.57083129882812, 'logps/ref_rejected': -100.46382141113281, 'KL/chosen_KL_mean': -73.78369140625, 'KL/rejected_KL_mean': -145.63211059570312, 'KL/mean': -109.70790100097656, 'KL/std': 83.37910461425781, 'logits/chosen': -0.5252622365951538, 'logits/rejected': -0.49835896492004395, 'epoch': 0.22} + 22%|██▏ | 151/681 [06:25<22:33, 2.55s/it] 22%|██▏ | 152/681 [06:28<22:46, 2.58s/it] {'loss': 1.0264, 'grad_norm': 20.60307502746582, 'learning_rate': 4.78177092112495e-07, 'fcm_dpo/beta': 0.0060538845136761665, 'fcm_dpo/q_t': 0.3849901556968689, 'fcm_dpo/delta': -0.12335566431283951, 'fcm_dpo/margin': 85.42166900634766, 'margin_dpo/margin_mean': 85.42166900634766, 'margin_dpo/margin_std': 103.75975799560547, 'logps/chosen': -124.49932861328125, 'logps/rejected': -255.89707946777344, 'logps/ref_chosen': -60.16438674926758, 'logps/ref_rejected': -106.14045715332031, 'KL/chosen_KL_mean': -64.33494567871094, 'KL/rejected_KL_mean': -149.75662231445312, 'KL/mean': -107.04579162597656, 'KL/std': 83.78707122802734, 'logits/chosen': -0.47641807794570923, 'logits/rejected': -0.47393327951431274, 'epoch': 0.22} + 22%|██▏ | 152/681 [06:28<22:46, 2.58s/it] 22%|██▏ | 153/681 [06:30<22:42, 2.58s/it] {'loss': 1.0966, 'grad_norm': 14.930242538452148, 'learning_rate': 4.776497044244016e-07, 'fcm_dpo/beta': 0.005982040427625179, 'fcm_dpo/q_t': 0.40594881772994995, 'fcm_dpo/delta': -0.03570834919810295, 'fcm_dpo/margin': 72.57691955566406, 'margin_dpo/margin_mean': 72.57691955566406, 'margin_dpo/margin_std': 111.9495849609375, 'logps/chosen': -119.21187591552734, 'logps/rejected': -221.12936401367188, 'logps/ref_chosen': -56.315277099609375, 'logps/ref_rejected': -85.65583801269531, 'KL/chosen_KL_mean': -62.89659881591797, 'KL/rejected_KL_mean': -135.47352600097656, 'KL/mean': -99.18505859375, 'KL/std': 90.631591796875, 'logits/chosen': -0.4422386884689331, 'logits/rejected': -0.43338215351104736, 'epoch': 0.22} + 22%|██▏ | 153/681 [06:30<22:42, 2.58s/it] 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] {'loss': 1.1354, 'grad_norm': 17.922649383544922, 'learning_rate': 4.771163179548808e-07, 'fcm_dpo/beta': 0.00597739452496171, 'fcm_dpo/q_t': 0.4101860225200653, 'fcm_dpo/delta': -0.007730741053819656, 'fcm_dpo/margin': 68.11343383789062, 'margin_dpo/margin_mean': 68.11343383789062, 'margin_dpo/margin_std': 118.15486145019531, 'logps/chosen': -138.8070526123047, 'logps/rejected': -248.422119140625, 'logps/ref_chosen': -62.74256896972656, 'logps/ref_rejected': -104.24420166015625, 'KL/chosen_KL_mean': -76.06448364257812, 'KL/rejected_KL_mean': -144.17791748046875, 'KL/mean': -110.12120056152344, 'KL/std': 89.9405517578125, 'logits/chosen': -0.49747714400291443, 'logits/rejected': -0.5010119676589966, 'epoch': 0.23} + 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] 23%|██▎ | 155/681 [06:35<22:56, 2.62s/it] {'loss': 1.1141, 'grad_norm': 18.32874298095703, 'learning_rate': 4.7657694675916247e-07, 'fcm_dpo/beta': 0.0059681423008441925, 'fcm_dpo/q_t': 0.4097937345504761, 'fcm_dpo/delta': 0.0006999801844358444, 'fcm_dpo/margin': 66.88587951660156, 'margin_dpo/margin_mean': 66.88587951660156, 'margin_dpo/margin_std': 103.57026672363281, 'logps/chosen': -129.560302734375, 'logps/rejected': -213.28521728515625, 'logps/ref_chosen': -60.65318298339844, 'logps/ref_rejected': -77.49220275878906, 'KL/chosen_KL_mean': -68.90711975097656, 'KL/rejected_KL_mean': -135.7930145263672, 'KL/mean': -102.35006713867188, 'KL/std': 81.32989501953125, 'logits/chosen': -0.48518913984298706, 'logits/rejected': -0.4643056392669678, 'epoch': 0.23} + 23%|██▎ | 155/681 [06:36<22:56, 2.62s/it] 23%|██▎ | 156/681 [06:38<22:55, 2.62s/it] {'loss': 1.2724, 'grad_norm': 26.82451629638672, 'learning_rate': 4.7603160505017893e-07, 'fcm_dpo/beta': 0.006016138941049576, 'fcm_dpo/q_t': 0.444929301738739, 'fcm_dpo/delta': 0.05333181843161583, 'fcm_dpo/margin': 39.98707962036133, 'margin_dpo/margin_mean': 39.98707580566406, 'margin_dpo/margin_std': 114.85380554199219, 'logps/chosen': -162.13059997558594, 'logps/rejected': -209.79510498046875, 'logps/ref_chosen': -69.49188232421875, 'logps/ref_rejected': -77.16929626464844, 'KL/chosen_KL_mean': -92.63871765136719, 'KL/rejected_KL_mean': -132.6258087158203, 'KL/mean': -112.63226318359375, 'KL/std': 85.67424011230469, 'logits/chosen': -0.4589994549751282, 'logits/rejected': -0.4519059658050537, 'epoch': 0.23} + 23%|██▎ | 156/681 [06:38<22:55, 2.62s/it] 23%|██▎ | 157/681 [06:41<22:28, 2.57s/it] {'loss': 1.0327, 'grad_norm': 21.786405563354492, 'learning_rate': 4.7548030719819154e-07, 'fcm_dpo/beta': 0.005881883203983307, 'fcm_dpo/q_t': 0.38307642936706543, 'fcm_dpo/delta': -0.11752188205718994, 'fcm_dpo/margin': 86.78329467773438, 'margin_dpo/margin_mean': 86.78329467773438, 'margin_dpo/margin_std': 104.9678955078125, 'logps/chosen': -144.07028198242188, 'logps/rejected': -277.13153076171875, 'logps/ref_chosen': -61.368438720703125, 'logps/ref_rejected': -107.64636993408203, 'KL/chosen_KL_mean': -82.70185089111328, 'KL/rejected_KL_mean': -169.48513793945312, 'KL/mean': -126.093505859375, 'KL/std': 92.4810791015625, 'logits/chosen': -0.4166560769081116, 'logits/rejected': -0.42006832361221313, 'epoch': 0.23} + 23%|██▎ | 157/681 [06:41<22:28, 2.57s/it] 23%|██▎ | 158/681 [06:43<22:31, 2.58s/it] {'loss': 1.0569, 'grad_norm': 17.88888931274414, 'learning_rate': 4.7492306773041136e-07, 'fcm_dpo/beta': 0.0057478612288832664, 'fcm_dpo/q_t': 0.38809406757354736, 'fcm_dpo/delta': -0.14822149276733398, 'fcm_dpo/margin': 93.98252868652344, 'margin_dpo/margin_mean': 93.98252868652344, 'margin_dpo/margin_std': 142.27569580078125, 'logps/chosen': -142.02557373046875, 'logps/rejected': -292.08984375, 'logps/ref_chosen': -57.612918853759766, 'logps/ref_rejected': -113.6946792602539, 'KL/chosen_KL_mean': -84.41264343261719, 'KL/rejected_KL_mean': -178.39517211914062, 'KL/mean': -131.40390014648438, 'KL/std': 117.00088500976562, 'logits/chosen': -0.3769122362136841, 'logits/rejected': -0.3925984501838684, 'epoch': 0.23} + 23%|██▎ | 158/681 [06:43<22:31, 2.58s/it] 23%|██▎ | 159/681 [06:46<22:40, 2.61s/it] {'loss': 1.1579, 'grad_norm': 26.526140213012695, 'learning_rate': 4.743599013306165e-07, 'fcm_dpo/beta': 0.00576662877574563, 'fcm_dpo/q_t': 0.41926220059394836, 'fcm_dpo/delta': 0.04188086465001106, 'fcm_dpo/margin': 62.26060485839844, 'margin_dpo/margin_mean': 62.26060485839844, 'margin_dpo/margin_std': 111.88368225097656, 'logps/chosen': -176.71168518066406, 'logps/rejected': -246.3106689453125, 'logps/ref_chosen': -81.56034851074219, 'logps/ref_rejected': -88.89871215820312, 'KL/chosen_KL_mean': -95.15133666992188, 'KL/rejected_KL_mean': -157.41195678710938, 'KL/mean': -126.28166198730469, 'KL/std': 101.34651184082031, 'logits/chosen': -0.4394528865814209, 'logits/rejected': -0.4065374433994293, 'epoch': 0.23} + 23%|██▎ | 159/681 [06:46<22:40, 2.61s/it] 23%|██▎ | 160/681 [06:48<22:22, 2.58s/it] {'loss': 1.0875, 'grad_norm': 20.257490158081055, 'learning_rate': 4.737908228387656e-07, 'fcm_dpo/beta': 0.005649491213262081, 'fcm_dpo/q_t': 0.3967619240283966, 'fcm_dpo/delta': -0.08930858224630356, 'fcm_dpo/margin': 85.69293212890625, 'margin_dpo/margin_mean': 85.69292449951172, 'margin_dpo/margin_std': 134.3516387939453, 'logps/chosen': -163.03579711914062, 'logps/rejected': -280.21563720703125, 'logps/ref_chosen': -65.73088073730469, 'logps/ref_rejected': -97.21781921386719, 'KL/chosen_KL_mean': -97.3049087524414, 'KL/rejected_KL_mean': -182.99783325195312, 'KL/mean': -140.1513671875, 'KL/std': 107.30947875976562, 'logits/chosen': -0.38310354948043823, 'logits/rejected': -0.37118303775787354, 'epoch': 0.23} + 23%|██▎ | 160/681 [06:48<22:22, 2.58s/it] 24%|██▎ | 161/681 [06:51<21:28, 2.48s/it] {'loss': 1.0919, 'grad_norm': 17.573366165161133, 'learning_rate': 4.7321584725060594e-07, 'fcm_dpo/beta': 0.005624156445264816, 'fcm_dpo/q_t': 0.40480250120162964, 'fcm_dpo/delta': -0.028947748243808746, 'fcm_dpo/margin': 76.04591369628906, 'margin_dpo/margin_mean': 76.04591369628906, 'margin_dpo/margin_std': 109.95503234863281, 'logps/chosen': -136.06640625, 'logps/rejected': -243.1068115234375, 'logps/ref_chosen': -52.43647003173828, 'logps/ref_rejected': -83.43095397949219, 'KL/chosen_KL_mean': -83.62994384765625, 'KL/rejected_KL_mean': -159.6758575439453, 'KL/mean': -121.65289306640625, 'KL/std': 88.20482635498047, 'logits/chosen': -0.4291399121284485, 'logits/rejected': -0.4256962835788727, 'epoch': 0.24} + 24%|██▎ | 161/681 [06:51<21:28, 2.48s/it] 24%|██▍ | 162/681 [06:53<22:08, 2.56s/it] {'loss': 1.1189, 'grad_norm': 20.28093147277832, 'learning_rate': 4.7263498971727905e-07, 'fcm_dpo/beta': 0.005556900054216385, 'fcm_dpo/q_t': 0.40978431701660156, 'fcm_dpo/delta': -0.011652916669845581, 'fcm_dpo/margin': 73.67125701904297, 'margin_dpo/margin_mean': 73.67125701904297, 'margin_dpo/margin_std': 116.21994018554688, 'logps/chosen': -145.13763427734375, 'logps/rejected': -245.58888244628906, 'logps/ref_chosen': -62.6105842590332, 'logps/ref_rejected': -89.39057922363281, 'KL/chosen_KL_mean': -82.52705383300781, 'KL/rejected_KL_mean': -156.19830322265625, 'KL/mean': -119.3626708984375, 'KL/std': 96.73245239257812, 'logits/chosen': -0.44169116020202637, 'logits/rejected': -0.42343568801879883, 'epoch': 0.24} + 24%|██▍ | 162/681 [06:53<22:08, 2.56s/it] 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] {'loss': 1.1174, 'grad_norm': 19.475297927856445, 'learning_rate': 4.720482655449212e-07, 'fcm_dpo/beta': 0.005600422620773315, 'fcm_dpo/q_t': 0.4107089638710022, 'fcm_dpo/delta': -0.006450829096138477, 'fcm_dpo/margin': 72.5283203125, 'margin_dpo/margin_mean': 72.5283203125, 'margin_dpo/margin_std': 116.68798828125, 'logps/chosen': -146.51133728027344, 'logps/rejected': -239.43624877929688, 'logps/ref_chosen': -55.021629333496094, 'logps/ref_rejected': -75.418212890625, 'KL/chosen_KL_mean': -91.48970794677734, 'KL/rejected_KL_mean': -164.01803588867188, 'KL/mean': -127.75386810302734, 'KL/std': 96.89871215820312, 'logits/chosen': -0.38897454738616943, 'logits/rejected': -0.3698977828025818, 'epoch': 0.24} + 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] 24%|██▍ | 164/681 [06:58<21:54, 2.54s/it] {'loss': 1.0448, 'grad_norm': 19.807645797729492, 'learning_rate': 4.714556901942599e-07, 'fcm_dpo/beta': 0.00548876728862524, 'fcm_dpo/q_t': 0.39110738039016724, 'fcm_dpo/delta': -0.08558943122625351, 'fcm_dpo/margin': 87.4451904296875, 'margin_dpo/margin_mean': 87.44519805908203, 'margin_dpo/margin_std': 107.89261627197266, 'logps/chosen': -140.1947021484375, 'logps/rejected': -251.66384887695312, 'logps/ref_chosen': -55.64066696166992, 'logps/ref_rejected': -79.66463470458984, 'KL/chosen_KL_mean': -84.55402374267578, 'KL/rejected_KL_mean': -171.9992218017578, 'KL/mean': -128.27662658691406, 'KL/std': 93.24118041992188, 'logits/chosen': -0.36497557163238525, 'logits/rejected': -0.34768325090408325, 'epoch': 0.24} + 24%|██▍ | 164/681 [06:58<21:54, 2.54s/it] 24%|██▍ | 165/681 [07:01<21:51, 2.54s/it] {'loss': 1.1709, 'grad_norm': 22.88750457763672, 'learning_rate': 4.708572792802069e-07, 'fcm_dpo/beta': 0.005550094414502382, 'fcm_dpo/q_t': 0.4268398880958557, 'fcm_dpo/delta': 0.07617515325546265, 'fcm_dpo/margin': 58.802772521972656, 'margin_dpo/margin_mean': 58.802772521972656, 'margin_dpo/margin_std': 107.60116577148438, 'logps/chosen': -153.78549194335938, 'logps/rejected': -224.9481658935547, 'logps/ref_chosen': -61.310691833496094, 'logps/ref_rejected': -73.67060852050781, 'KL/chosen_KL_mean': -92.47479248046875, 'KL/rejected_KL_mean': -151.27755737304688, 'KL/mean': -121.87619018554688, 'KL/std': 80.65908813476562, 'logits/chosen': -0.39553022384643555, 'logits/rejected': -0.36636269092559814, 'epoch': 0.24} + 24%|██▍ | 165/681 [07:01<21:51, 2.54s/it] 24%|██▍ | 166/681 [07:03<20:59, 2.45s/it] {'loss': 1.0224, 'grad_norm': 17.94371223449707, 'learning_rate': 4.702530485714461e-07, 'fcm_dpo/beta': 0.005407451651990414, 'fcm_dpo/q_t': 0.38125768303871155, 'fcm_dpo/delta': -0.1903567910194397, 'fcm_dpo/margin': 106.9185562133789, 'margin_dpo/margin_mean': 106.91854858398438, 'margin_dpo/margin_std': 150.0036163330078, 'logps/chosen': -134.6000518798828, 'logps/rejected': -288.630126953125, 'logps/ref_chosen': -50.98360061645508, 'logps/ref_rejected': -98.09512329101562, 'KL/chosen_KL_mean': -83.616455078125, 'KL/rejected_KL_mean': -190.53501892089844, 'KL/mean': -137.0757293701172, 'KL/std': 119.21273803710938, 'logits/chosen': -0.3321695327758789, 'logits/rejected': -0.3424978256225586, 'epoch': 0.24} + 24%|██▍ | 166/681 [07:03<20:59, 2.45s/it] 25%|██▍ | 167/681 [07:06<21:22, 2.50s/it] {'loss': 0.9761, 'grad_norm': 17.214645385742188, 'learning_rate': 4.6964301399001877e-07, 'fcm_dpo/beta': 0.0052237361669540405, 'fcm_dpo/q_t': 0.36922937631607056, 'fcm_dpo/delta': -0.19318926334381104, 'fcm_dpo/margin': 111.38297271728516, 'margin_dpo/margin_mean': 111.38297271728516, 'margin_dpo/margin_std': 121.13790893554688, 'logps/chosen': -134.62753295898438, 'logps/rejected': -291.6168212890625, 'logps/ref_chosen': -50.424095153808594, 'logps/ref_rejected': -96.03042602539062, 'KL/chosen_KL_mean': -84.20343780517578, 'KL/rejected_KL_mean': -195.58641052246094, 'KL/mean': -139.89492797851562, 'KL/std': 104.5910873413086, 'logits/chosen': -0.3567941188812256, 'logits/rejected': -0.36129891872406006, 'epoch': 0.25} + 25%|██▍ | 167/681 [07:06<21:22, 2.50s/it] 25%|██▍ | 168/681 [07:08<21:34, 2.52s/it] {'loss': 1.0879, 'grad_norm': 20.31831932067871, 'learning_rate': 4.690271916109034e-07, 'fcm_dpo/beta': 0.005147742573171854, 'fcm_dpo/q_t': 0.40599554777145386, 'fcm_dpo/delta': -0.019897453486919403, 'fcm_dpo/margin': 81.37892150878906, 'margin_dpo/margin_mean': 81.37892150878906, 'margin_dpo/margin_std': 113.19637298583984, 'logps/chosen': -142.39743041992188, 'logps/rejected': -249.6220703125, 'logps/ref_chosen': -49.462825775146484, 'logps/ref_rejected': -75.30855560302734, 'KL/chosen_KL_mean': -92.93460083007812, 'KL/rejected_KL_mean': -174.31350708007812, 'KL/mean': -133.62405395507812, 'KL/std': 100.2225341796875, 'logits/chosen': -0.3324066996574402, 'logits/rejected': -0.32228702306747437, 'epoch': 0.25} + 25%|██▍ | 168/681 [07:08<21:34, 2.52s/it] 25%|██▍ | 169/681 [07:11<22:20, 2.62s/it] {'loss': 1.168, 'grad_norm': 20.161535263061523, 'learning_rate': 4.6840559766159235e-07, 'fcm_dpo/beta': 0.005072026047855616, 'fcm_dpo/q_t': 0.42030519247055054, 'fcm_dpo/delta': -0.08044641464948654, 'fcm_dpo/margin': 73.04991149902344, 'margin_dpo/margin_mean': 73.04991149902344, 'margin_dpo/margin_std': 141.12490844726562, 'logps/chosen': -156.9058837890625, 'logps/rejected': -253.49807739257812, 'logps/ref_chosen': -59.803443908691406, 'logps/ref_rejected': -83.34574890136719, 'KL/chosen_KL_mean': -97.10243225097656, 'KL/rejected_KL_mean': -170.15234375, 'KL/mean': -133.62738037109375, 'KL/std': 102.53475952148438, 'logits/chosen': -0.3249385356903076, 'logits/rejected': -0.3059506416320801, 'epoch': 0.25} + 25%|██▍ | 169/681 [07:11<22:20, 2.62s/it] 25%|██▍ | 170/681 [07:14<22:38, 2.66s/it] {'loss': 1.0942, 'grad_norm': 17.05228042602539, 'learning_rate': 4.6777824852166437e-07, 'fcm_dpo/beta': 0.005024762358516455, 'fcm_dpo/q_t': 0.40413689613342285, 'fcm_dpo/delta': -0.02076905593276024, 'fcm_dpo/margin': 83.38722229003906, 'margin_dpo/margin_mean': 83.38722229003906, 'margin_dpo/margin_std': 117.27194213867188, 'logps/chosen': -135.02371215820312, 'logps/rejected': -244.85650634765625, 'logps/ref_chosen': -49.471771240234375, 'logps/ref_rejected': -75.91734313964844, 'KL/chosen_KL_mean': -85.55194091796875, 'KL/rejected_KL_mean': -168.9391632080078, 'KL/mean': -127.24555206298828, 'KL/std': 97.04989624023438, 'logits/chosen': -0.3223455548286438, 'logits/rejected': -0.3122260272502899, 'epoch': 0.25} + 25%|██▍ | 170/681 [07:14<22:38, 2.66s/it] 25%|██▌ | 171/681 [07:16<21:44, 2.56s/it] {'loss': 1.1705, 'grad_norm': 28.202070236206055, 'learning_rate': 4.6714516072235273e-07, 'fcm_dpo/beta': 0.00508046243339777, 'fcm_dpo/q_t': 0.42305469512939453, 'fcm_dpo/delta': 0.039394039660692215, 'fcm_dpo/margin': 71.26011657714844, 'margin_dpo/margin_mean': 71.26011657714844, 'margin_dpo/margin_std': 142.1697235107422, 'logps/chosen': -205.76284790039062, 'logps/rejected': -301.9057312011719, 'logps/ref_chosen': -84.49931335449219, 'logps/ref_rejected': -109.38209533691406, 'KL/chosen_KL_mean': -121.26353454589844, 'KL/rejected_KL_mean': -192.5236358642578, 'KL/mean': -156.89358520507812, 'KL/std': 110.09349060058594, 'logits/chosen': -0.31354865431785583, 'logits/rejected': -0.29599112272262573, 'epoch': 0.25} + 25%|██▌ | 171/681 [07:16<21:44, 2.56s/it] 25%|██▌ | 172/681 [07:19<21:29, 2.53s/it] {'loss': 1.1391, 'grad_norm': 19.106325149536133, 'learning_rate': 4.6650635094610966e-07, 'fcm_dpo/beta': 0.005108260549604893, 'fcm_dpo/q_t': 0.41656991839408875, 'fcm_dpo/delta': 0.028583845123648643, 'fcm_dpo/margin': 72.91913604736328, 'margin_dpo/margin_mean': 72.91913604736328, 'margin_dpo/margin_std': 123.32398986816406, 'logps/chosen': -173.62689208984375, 'logps/rejected': -263.32879638671875, 'logps/ref_chosen': -68.65391540527344, 'logps/ref_rejected': -85.43667602539062, 'KL/chosen_KL_mean': -104.97297668457031, 'KL/rejected_KL_mean': -177.89212036132812, 'KL/mean': -141.43255615234375, 'KL/std': 106.18240356445312, 'logits/chosen': -0.38692790269851685, 'logits/rejected': -0.36780279874801636, 'epoch': 0.25} + 25%|██▌ | 172/681 [07:19<21:29, 2.53s/it] 25%|██▌ | 173/681 [07:21<21:18, 2.52s/it] {'loss': 1.1146, 'grad_norm': 20.805395126342773, 'learning_rate': 4.6586183602616687e-07, 'fcm_dpo/beta': 0.005157306790351868, 'fcm_dpo/q_t': 0.4147951602935791, 'fcm_dpo/delta': 0.0244424007833004, 'fcm_dpo/margin': 72.93345642089844, 'margin_dpo/margin_mean': 72.93345642089844, 'margin_dpo/margin_std': 106.28599548339844, 'logps/chosen': -158.9794464111328, 'logps/rejected': -247.5459442138672, 'logps/ref_chosen': -63.050880432128906, 'logps/ref_rejected': -78.68392181396484, 'KL/chosen_KL_mean': -95.9285659790039, 'KL/rejected_KL_mean': -168.86203002929688, 'KL/mean': -132.39529418945312, 'KL/std': 101.62166595458984, 'logits/chosen': -0.38531604409217834, 'logits/rejected': -0.3533366620540619, 'epoch': 0.25} + 25%|██▌ | 173/681 [07:21<21:18, 2.52s/it] 26%|██▌ | 174/681 [07:24<21:18, 2.52s/it] {'loss': 1.0809, 'grad_norm': 24.817533493041992, 'learning_rate': 4.652116329460919e-07, 'fcm_dpo/beta': 0.005131464451551437, 'fcm_dpo/q_t': 0.3998154103755951, 'fcm_dpo/delta': -0.05737413465976715, 'fcm_dpo/margin': 88.55628967285156, 'margin_dpo/margin_mean': 88.55628967285156, 'margin_dpo/margin_std': 126.68205261230469, 'logps/chosen': -143.3165283203125, 'logps/rejected': -280.42108154296875, 'logps/ref_chosen': -53.36296844482422, 'logps/ref_rejected': -101.91120910644531, 'KL/chosen_KL_mean': -89.95355224609375, 'KL/rejected_KL_mean': -178.50985717773438, 'KL/mean': -134.23170471191406, 'KL/std': 104.74340057373047, 'logits/chosen': -0.3375644087791443, 'logits/rejected': -0.35590213537216187, 'epoch': 0.26} + 26%|██▌ | 174/681 [07:24<21:18, 2.52s/it] 26%|██▌ | 175/681 [07:26<21:30, 2.55s/it] {'loss': 0.9676, 'grad_norm': 25.079553604125977, 'learning_rate': 4.645557588393406e-07, 'fcm_dpo/beta': 0.004940693732351065, 'fcm_dpo/q_t': 0.36849379539489746, 'fcm_dpo/delta': -0.18679270148277283, 'fcm_dpo/margin': 116.53382873535156, 'margin_dpo/margin_mean': 116.53382873535156, 'margin_dpo/margin_std': 118.0002212524414, 'logps/chosen': -129.42018127441406, 'logps/rejected': -290.04205322265625, 'logps/ref_chosen': -45.417762756347656, 'logps/ref_rejected': -89.50579833984375, 'KL/chosen_KL_mean': -84.00241088867188, 'KL/rejected_KL_mean': -200.5362548828125, 'KL/mean': -142.2693328857422, 'KL/std': 111.97515869140625, 'logits/chosen': -0.3228433430194855, 'logits/rejected': -0.3084716796875, 'epoch': 0.26} + 26%|██▌ | 175/681 [07:26<21:30, 2.55s/it] 26%|██▌ | 176/681 [07:29<20:48, 2.47s/it] {'loss': 1.0415, 'grad_norm': 18.32849884033203, 'learning_rate': 4.638942309888058e-07, 'fcm_dpo/beta': 0.004837565589696169, 'fcm_dpo/q_t': 0.392004132270813, 'fcm_dpo/delta': -0.09568466246128082, 'fcm_dpo/margin': 101.51274108886719, 'margin_dpo/margin_mean': 101.51274108886719, 'margin_dpo/margin_std': 129.95030212402344, 'logps/chosen': -139.19252014160156, 'logps/rejected': -285.8114013671875, 'logps/ref_chosen': -50.452842712402344, 'logps/ref_rejected': -95.5589599609375, 'KL/chosen_KL_mean': -88.73968505859375, 'KL/rejected_KL_mean': -190.25241088867188, 'KL/mean': -139.49606323242188, 'KL/std': 112.8240966796875, 'logits/chosen': -0.3147198557853699, 'logits/rejected': -0.332048237323761, 'epoch': 0.26} + 26%|██▌ | 176/681 [07:29<20:48, 2.47s/it] 26%|██▌ | 177/681 [07:31<21:07, 2.52s/it] {'loss': 1.0435, 'grad_norm': 19.02363395690918, 'learning_rate': 4.6322706682636137e-07, 'fcm_dpo/beta': 0.004758263938128948, 'fcm_dpo/q_t': 0.3923775553703308, 'fcm_dpo/delta': -0.0877579003572464, 'fcm_dpo/margin': 101.63257598876953, 'margin_dpo/margin_mean': 101.63257598876953, 'margin_dpo/margin_std': 129.4840545654297, 'logps/chosen': -163.310302734375, 'logps/rejected': -299.62017822265625, 'logps/ref_chosen': -61.216468811035156, 'logps/ref_rejected': -95.89378356933594, 'KL/chosen_KL_mean': -102.09382629394531, 'KL/rejected_KL_mean': -203.72640991210938, 'KL/mean': -152.91012573242188, 'KL/std': 122.18125915527344, 'logits/chosen': -0.3660031855106354, 'logits/rejected': -0.3579842448234558, 'epoch': 0.26} + 26%|██▌ | 177/681 [07:31<21:07, 2.52s/it] 26%|██▌ | 178/681 [07:34<21:00, 2.51s/it] {'loss': 1.0058, 'grad_norm': 22.4151554107666, 'learning_rate': 4.6255428393240354e-07, 'fcm_dpo/beta': 0.004583236761391163, 'fcm_dpo/q_t': 0.3769300878047943, 'fcm_dpo/delta': -0.17850404977798462, 'fcm_dpo/margin': 123.82139587402344, 'margin_dpo/margin_mean': 123.82139587402344, 'margin_dpo/margin_std': 155.78990173339844, 'logps/chosen': -172.28501892089844, 'logps/rejected': -343.20697021484375, 'logps/ref_chosen': -58.26478958129883, 'logps/ref_rejected': -105.3653335571289, 'KL/chosen_KL_mean': -114.02023315429688, 'KL/rejected_KL_mean': -237.8416290283203, 'KL/mean': -175.93092346191406, 'KL/std': 142.59483337402344, 'logits/chosen': -0.2533833086490631, 'logits/rejected': -0.24754983186721802, 'epoch': 0.26} + 26%|██▌ | 178/681 [07:34<21:00, 2.51s/it] 26%|██▋ | 179/681 [07:36<21:29, 2.57s/it] {'loss': 1.146, 'grad_norm': 29.362442016601562, 'learning_rate': 4.6187590003538724e-07, 'fcm_dpo/beta': 0.004542763344943523, 'fcm_dpo/q_t': 0.4141519069671631, 'fcm_dpo/delta': 0.008336875587701797, 'fcm_dpo/margin': 86.20682525634766, 'margin_dpo/margin_mean': 86.20682525634766, 'margin_dpo/margin_std': 153.45443725585938, 'logps/chosen': -180.2083740234375, 'logps/rejected': -295.88470458984375, 'logps/ref_chosen': -61.05832290649414, 'logps/ref_rejected': -90.52782440185547, 'KL/chosen_KL_mean': -119.15005493164062, 'KL/rejected_KL_mean': -205.35687255859375, 'KL/mean': -162.2534637451172, 'KL/std': 122.278076171875, 'logits/chosen': -0.287581205368042, 'logits/rejected': -0.29421767592430115, 'epoch': 0.26} + 26%|██▋ | 179/681 [07:37<21:29, 2.57s/it] 26%|██▋ | 180/681 [07:39<21:05, 2.53s/it] {'loss': 1.0244, 'grad_norm': 20.13519859313965, 'learning_rate': 4.611919330113591e-07, 'fcm_dpo/beta': 0.004480388015508652, 'fcm_dpo/q_t': 0.38382506370544434, 'fcm_dpo/delta': -0.11835239082574844, 'fcm_dpo/margin': 114.30378723144531, 'margin_dpo/margin_mean': 114.30378723144531, 'margin_dpo/margin_std': 138.1670379638672, 'logps/chosen': -156.6915283203125, 'logps/rejected': -314.86444091796875, 'logps/ref_chosen': -54.34272003173828, 'logps/ref_rejected': -98.21183776855469, 'KL/chosen_KL_mean': -102.34881591796875, 'KL/rejected_KL_mean': -216.65260314941406, 'KL/mean': -159.50070190429688, 'KL/std': 110.17591857910156, 'logits/chosen': -0.27647683024406433, 'logits/rejected': -0.27314233779907227, 'epoch': 0.26} + 26%|██▋ | 180/681 [07:39<21:05, 2.53s/it] 27%|██▋ | 181/681 [07:42<21:15, 2.55s/it] {'loss': 1.1817, 'grad_norm': 19.06928825378418, 'learning_rate': 4.605024008834863e-07, 'fcm_dpo/beta': 0.004530083388090134, 'fcm_dpo/q_t': 0.4301533102989197, 'fcm_dpo/delta': 0.0955948680639267, 'fcm_dpo/margin': 67.79975128173828, 'margin_dpo/margin_mean': 67.79975128173828, 'margin_dpo/margin_std': 127.82617950439453, 'logps/chosen': -143.83792114257812, 'logps/rejected': -218.29339599609375, 'logps/ref_chosen': -55.000457763671875, 'logps/ref_rejected': -61.656166076660156, 'KL/chosen_KL_mean': -88.83747100830078, 'KL/rejected_KL_mean': -156.63722229003906, 'KL/mean': -122.73734283447266, 'KL/std': 99.75308990478516, 'logits/chosen': -0.30789846181869507, 'logits/rejected': -0.2823750972747803, 'epoch': 0.27} + 27%|██▋ | 181/681 [07:42<21:15, 2.55s/it] 27%|██▋ | 182/681 [07:44<21:15, 2.56s/it] {'loss': 1.0052, 'grad_norm': 17.178207397460938, 'learning_rate': 4.598073218215817e-07, 'fcm_dpo/beta': 0.0044230264611542225, 'fcm_dpo/q_t': 0.37559401988983154, 'fcm_dpo/delta': -0.1580391675233841, 'fcm_dpo/margin': 124.02388000488281, 'margin_dpo/margin_mean': 124.02387237548828, 'margin_dpo/margin_std': 144.7704620361328, 'logps/chosen': -123.50967407226562, 'logps/rejected': -295.947265625, 'logps/ref_chosen': -41.107852935791016, 'logps/ref_rejected': -89.5215835571289, 'KL/chosen_KL_mean': -82.40182495117188, 'KL/rejected_KL_mean': -206.42568969726562, 'KL/mean': -144.41375732421875, 'KL/std': 120.55620574951172, 'logits/chosen': -0.27274084091186523, 'logits/rejected': -0.28051310777664185, 'epoch': 0.27} + 27%|██▋ | 182/681 [07:44<21:15, 2.56s/it] 27%|██▋ | 183/681 [07:46<20:40, 2.49s/it] {'loss': 1.1896, 'grad_norm': 21.047754287719727, 'learning_rate': 4.5910671414162484e-07, 'fcm_dpo/beta': 0.004367251414805651, 'fcm_dpo/q_t': 0.4345667362213135, 'fcm_dpo/delta': -0.04111050069332123, 'fcm_dpo/margin': 64.02972412109375, 'margin_dpo/margin_mean': 64.02973175048828, 'margin_dpo/margin_std': 109.3995132446289, 'logps/chosen': -182.46755981445312, 'logps/rejected': -264.9484558105469, 'logps/ref_chosen': -57.52456283569336, 'logps/ref_rejected': -75.97572326660156, 'KL/chosen_KL_mean': -124.9429931640625, 'KL/rejected_KL_mean': -188.97271728515625, 'KL/mean': -156.95785522460938, 'KL/std': 100.42144012451172, 'logits/chosen': -0.3011692762374878, 'logits/rejected': -0.2902328372001648, 'epoch': 0.27} + 27%|██▋ | 183/681 [07:46<20:40, 2.49s/it] 27%|██▋ | 184/681 [07:49<21:10, 2.56s/it] {'loss': 1.1784, 'grad_norm': 17.791168212890625, 'learning_rate': 4.5840059630527985e-07, 'fcm_dpo/beta': 0.00436544232070446, 'fcm_dpo/q_t': 0.4317898750305176, 'fcm_dpo/delta': -0.0041433474980294704, 'fcm_dpo/margin': 68.87787628173828, 'margin_dpo/margin_mean': 68.87787628173828, 'margin_dpo/margin_std': 122.99388122558594, 'logps/chosen': -165.41015625, 'logps/rejected': -252.37713623046875, 'logps/ref_chosen': -58.544952392578125, 'logps/ref_rejected': -76.63406372070312, 'KL/chosen_KL_mean': -106.86518859863281, 'KL/rejected_KL_mean': -175.74305725097656, 'KL/mean': -141.30413818359375, 'KL/std': 97.58000183105469, 'logits/chosen': -0.33284837007522583, 'logits/rejected': -0.32246139645576477, 'epoch': 0.27} + 27%|██▋ | 184/681 [07:49<21:10, 2.56s/it] 27%|██▋ | 185/681 [07:52<20:49, 2.52s/it] {'loss': 1.2441, 'grad_norm': 19.48412322998047, 'learning_rate': 4.5768898691940836e-07, 'fcm_dpo/beta': 0.004431641660630703, 'fcm_dpo/q_t': 0.44948315620422363, 'fcm_dpo/delta': 0.0773247703909874, 'fcm_dpo/margin': 52.95130157470703, 'margin_dpo/margin_mean': 52.9513053894043, 'margin_dpo/margin_std': 132.49623107910156, 'logps/chosen': -176.85086059570312, 'logps/rejected': -241.5389404296875, 'logps/ref_chosen': -62.025848388671875, 'logps/ref_rejected': -73.7625961303711, 'KL/chosen_KL_mean': -114.82501983642578, 'KL/rejected_KL_mean': -167.77633666992188, 'KL/mean': -141.30067443847656, 'KL/std': 109.46660614013672, 'logits/chosen': -0.2981659173965454, 'logits/rejected': -0.2731373608112335, 'epoch': 0.27} + 27%|██▋ | 185/681 [07:52<20:49, 2.52s/it] 27%|██▋ | 186/681 [07:54<20:44, 2.51s/it] {'loss': 1.0496, 'grad_norm': 20.40852928161621, 'learning_rate': 4.5697190473557947e-07, 'fcm_dpo/beta': 0.004385577980428934, 'fcm_dpo/q_t': 0.39578211307525635, 'fcm_dpo/delta': -0.06044544652104378, 'fcm_dpo/margin': 104.33164978027344, 'margin_dpo/margin_mean': 104.33164978027344, 'margin_dpo/margin_std': 125.84209442138672, 'logps/chosen': -174.4224853515625, 'logps/rejected': -297.47308349609375, 'logps/ref_chosen': -69.35346984863281, 'logps/ref_rejected': -88.07244873046875, 'KL/chosen_KL_mean': -105.06900787353516, 'KL/rejected_KL_mean': -209.400634765625, 'KL/mean': -157.23483276367188, 'KL/std': 109.59912109375, 'logits/chosen': -0.3577519655227661, 'logits/rejected': -0.3329923450946808, 'epoch': 0.27} + 27%|██▋ | 186/681 [07:54<20:44, 2.51s/it] 27%|██▋ | 187/681 [07:56<20:10, 2.45s/it] {'loss': 1.0945, 'grad_norm': 21.53246307373047, 'learning_rate': 4.5624936864957555e-07, 'fcm_dpo/beta': 0.004401649348437786, 'fcm_dpo/q_t': 0.4099273979663849, 'fcm_dpo/delta': 0.00935973972082138, 'fcm_dpo/margin': 88.75602722167969, 'margin_dpo/margin_mean': 88.75602722167969, 'margin_dpo/margin_std': 114.76980590820312, 'logps/chosen': -152.15496826171875, 'logps/rejected': -270.1236267089844, 'logps/ref_chosen': -52.7564582824707, 'logps/ref_rejected': -81.96910095214844, 'KL/chosen_KL_mean': -99.39851379394531, 'KL/rejected_KL_mean': -188.154541015625, 'KL/mean': -143.77651977539062, 'KL/std': 107.75646209716797, 'logits/chosen': -0.3320094645023346, 'logits/rejected': -0.32622426748275757, 'epoch': 0.27} + 27%|██▋ | 187/681 [07:56<20:10, 2.45s/it] 28%|██▊ | 188/681 [07:59<20:23, 2.48s/it] {'loss': 1.0482, 'grad_norm': 31.087356567382812, 'learning_rate': 4.5552139770089454e-07, 'fcm_dpo/beta': 0.004345991648733616, 'fcm_dpo/q_t': 0.39470282196998596, 'fcm_dpo/delta': -0.0692645013332367, 'fcm_dpo/margin': 107.2442626953125, 'margin_dpo/margin_mean': 107.2442626953125, 'margin_dpo/margin_std': 131.91983032226562, 'logps/chosen': -143.01242065429688, 'logps/rejected': -290.3816223144531, 'logps/ref_chosen': -49.415489196777344, 'logps/ref_rejected': -89.54043579101562, 'KL/chosen_KL_mean': -93.59693908691406, 'KL/rejected_KL_mean': -200.8411865234375, 'KL/mean': -147.21905517578125, 'KL/std': 118.67021179199219, 'logits/chosen': -0.328615665435791, 'logits/rejected': -0.3348105847835541, 'epoch': 0.28} + 28%|██▊ | 188/681 [07:59<20:23, 2.48s/it] 28%|██▊ | 189/681 [08:01<20:09, 2.46s/it] {'loss': 1.1315, 'grad_norm': 24.516483306884766, 'learning_rate': 4.5478801107224794e-07, 'fcm_dpo/beta': 0.0043410686776041985, 'fcm_dpo/q_t': 0.41580936312675476, 'fcm_dpo/delta': 0.01706843078136444, 'fcm_dpo/margin': 88.35537719726562, 'margin_dpo/margin_mean': 88.35537719726562, 'margin_dpo/margin_std': 149.4475860595703, 'logps/chosen': -162.36471557617188, 'logps/rejected': -270.4884948730469, 'logps/ref_chosen': -52.39896011352539, 'logps/ref_rejected': -72.16735076904297, 'KL/chosen_KL_mean': -109.96575927734375, 'KL/rejected_KL_mean': -198.32113647460938, 'KL/mean': -154.14344787597656, 'KL/std': 121.00015258789062, 'logits/chosen': -0.31679028272628784, 'logits/rejected': -0.29732340574264526, 'epoch': 0.28} + 28%|██▊ | 189/681 [08:01<20:09, 2.46s/it] 28%|██▊ | 190/681 [08:04<19:37, 2.40s/it] {'loss': 1.0841, 'grad_norm': 18.538650512695312, 'learning_rate': 4.5404922808905543e-07, 'fcm_dpo/beta': 0.004339671693742275, 'fcm_dpo/q_t': 0.39942899346351624, 'fcm_dpo/delta': -0.058640651404857635, 'fcm_dpo/margin': 104.86446380615234, 'margin_dpo/margin_mean': 104.86446380615234, 'margin_dpo/margin_std': 149.67710876464844, 'logps/chosen': -183.76559448242188, 'logps/rejected': -326.49749755859375, 'logps/ref_chosen': -64.68305969238281, 'logps/ref_rejected': -102.55052185058594, 'KL/chosen_KL_mean': -119.08251953125, 'KL/rejected_KL_mean': -223.94699096679688, 'KL/mean': -171.51473999023438, 'KL/std': 130.3875732421875, 'logits/chosen': -0.3367846608161926, 'logits/rejected': -0.32651811838150024, 'epoch': 0.28} + 28%|██▊ | 190/681 [08:04<19:37, 2.40s/it] 28%|██▊ | 191/681 [08:06<20:25, 2.50s/it] {'loss': 0.9577, 'grad_norm': 19.302614212036133, 'learning_rate': 4.5330506821893565e-07, 'fcm_dpo/beta': 0.004123254679143429, 'fcm_dpo/q_t': 0.36313188076019287, 'fcm_dpo/delta': -0.2364530861377716, 'fcm_dpo/margin': 150.44508361816406, 'margin_dpo/margin_mean': 150.44508361816406, 'margin_dpo/margin_std': 164.7996826171875, 'logps/chosen': -177.88558959960938, 'logps/rejected': -369.81146240234375, 'logps/ref_chosen': -68.65887451171875, 'logps/ref_rejected': -110.1396713256836, 'KL/chosen_KL_mean': -109.22671508789062, 'KL/rejected_KL_mean': -259.6717834472656, 'KL/mean': -184.44924926757812, 'KL/std': 148.47653198242188, 'logits/chosen': -0.31450676918029785, 'logits/rejected': -0.2896798849105835, 'epoch': 0.28} + 28%|██▊ | 191/681 [08:06<20:25, 2.50s/it] 28%|██▊ | 192/681 [08:09<20:19, 2.49s/it] {'loss': 1.1173, 'grad_norm': 21.972942352294922, 'learning_rate': 4.5255555107119336e-07, 'fcm_dpo/beta': 0.004099044483155012, 'fcm_dpo/q_t': 0.4107716679573059, 'fcm_dpo/delta': -0.0063680801540613174, 'fcm_dpo/margin': 99.04568481445312, 'margin_dpo/margin_mean': 99.04568481445312, 'margin_dpo/margin_std': 159.619140625, 'logps/chosen': -208.66738891601562, 'logps/rejected': -341.3074951171875, 'logps/ref_chosen': -69.72691345214844, 'logps/ref_rejected': -103.32135009765625, 'KL/chosen_KL_mean': -138.94046020507812, 'KL/rejected_KL_mean': -237.9861602783203, 'KL/mean': -188.46331787109375, 'KL/std': 130.4176025390625, 'logits/chosen': -0.28979045152664185, 'logits/rejected': -0.2879485487937927, 'epoch': 0.28} + 28%|██▊ | 192/681 [08:09<20:19, 2.49s/it] 28%|██▊ | 193/681 [08:11<20:01, 2.46s/it] {'loss': 1.2463, 'grad_norm': 28.599891662597656, 'learning_rate': 4.5180069639630236e-07, 'fcm_dpo/beta': 0.004099993035197258, 'fcm_dpo/q_t': 0.4411364793777466, 'fcm_dpo/delta': 0.03158862516283989, 'fcm_dpo/margin': 61.212486267089844, 'margin_dpo/margin_mean': 61.21249008178711, 'margin_dpo/margin_std': 154.33920288085938, 'logps/chosen': -196.1470947265625, 'logps/rejected': -273.5766296386719, 'logps/ref_chosen': -60.19049835205078, 'logps/ref_rejected': -76.40755462646484, 'KL/chosen_KL_mean': -135.95660400390625, 'KL/rejected_KL_mean': -197.16908264160156, 'KL/mean': -166.56283569335938, 'KL/std': 121.18572998046875, 'logits/chosen': -0.3054850101470947, 'logits/rejected': -0.2954953908920288, 'epoch': 0.28} + 28%|██▊ | 193/681 [08:11<20:01, 2.46s/it] 28%|██▊ | 194/681 [08:14<19:39, 2.42s/it] {'loss': 1.0929, 'grad_norm': 17.3194637298584, 'learning_rate': 4.510405240853854e-07, 'fcm_dpo/beta': 0.004112754482775927, 'fcm_dpo/q_t': 0.4111691117286682, 'fcm_dpo/delta': 0.02185986563563347, 'fcm_dpo/margin': 92.10918426513672, 'margin_dpo/margin_mean': 92.10918426513672, 'margin_dpo/margin_std': 109.87422943115234, 'logps/chosen': -124.33091735839844, 'logps/rejected': -239.28451538085938, 'logps/ref_chosen': -37.84037399291992, 'logps/ref_rejected': -60.684783935546875, 'KL/chosen_KL_mean': -86.49054718017578, 'KL/rejected_KL_mean': -178.5997314453125, 'KL/mean': -132.54513549804688, 'KL/std': 100.00372314453125, 'logits/chosen': -0.22373536229133606, 'logits/rejected': -0.20780496299266815, 'epoch': 0.28} + 28%|██▊ | 194/681 [08:14<19:39, 2.42s/it] 29%|██▊ | 195/681 [08:16<20:08, 2.49s/it] {'loss': 1.0699, 'grad_norm': 20.05968475341797, 'learning_rate': 4.5027505416968985e-07, 'fcm_dpo/beta': 0.004115342628210783, 'fcm_dpo/q_t': 0.4027373194694519, 'fcm_dpo/delta': -0.023933224380016327, 'fcm_dpo/margin': 102.76541900634766, 'margin_dpo/margin_mean': 102.76541137695312, 'margin_dpo/margin_std': 126.29493713378906, 'logps/chosen': -189.1000518798828, 'logps/rejected': -333.744873046875, 'logps/ref_chosen': -54.891571044921875, 'logps/ref_rejected': -96.77095794677734, 'KL/chosen_KL_mean': -134.20848083496094, 'KL/rejected_KL_mean': -236.97390747070312, 'KL/mean': -185.5911865234375, 'KL/std': 123.36296081542969, 'logits/chosen': -0.2823333740234375, 'logits/rejected': -0.29932117462158203, 'epoch': 0.29} + 29%|██▊ | 195/681 [08:16<20:08, 2.49s/it] 29%|██▉ | 196/681 [08:19<20:15, 2.51s/it] {'loss': 1.0639, 'grad_norm': 16.996543884277344, 'learning_rate': 4.495043068200599e-07, 'fcm_dpo/beta': 0.004045085981488228, 'fcm_dpo/q_t': 0.39634737372398376, 'fcm_dpo/delta': -0.06845034658908844, 'fcm_dpo/margin': 114.74829864501953, 'margin_dpo/margin_mean': 114.74829864501953, 'margin_dpo/margin_std': 149.60369873046875, 'logps/chosen': -159.487060546875, 'logps/rejected': -297.0430603027344, 'logps/ref_chosen': -53.245243072509766, 'logps/ref_rejected': -76.05294799804688, 'KL/chosen_KL_mean': -106.2418212890625, 'KL/rejected_KL_mean': -220.99012756347656, 'KL/mean': -163.61598205566406, 'KL/std': 124.76324462890625, 'logits/chosen': -0.3161476254463196, 'logits/rejected': -0.30336615443229675, 'epoch': 0.29} + 29%|██▉ | 196/681 [08:19<20:15, 2.51s/it] 29%|██▉ | 197/681 [08:21<20:26, 2.53s/it] {'loss': 1.1245, 'grad_norm': 18.929014205932617, 'learning_rate': 4.4872830234640493e-07, 'fcm_dpo/beta': 0.004100443329662085, 'fcm_dpo/q_t': 0.4195671081542969, 'fcm_dpo/delta': 0.048719555139541626, 'fcm_dpo/margin': 85.9752197265625, 'margin_dpo/margin_mean': 85.9752197265625, 'margin_dpo/margin_std': 122.52574920654297, 'logps/chosen': -169.8308868408203, 'logps/rejected': -272.5946960449219, 'logps/ref_chosen': -60.42033386230469, 'logps/ref_rejected': -77.20890808105469, 'KL/chosen_KL_mean': -109.41055297851562, 'KL/rejected_KL_mean': -195.3857879638672, 'KL/mean': -152.39816284179688, 'KL/std': 105.53937530517578, 'logits/chosen': -0.2705003023147583, 'logits/rejected': -0.264984130859375, 'epoch': 0.29} + 29%|██▉ | 197/681 [08:21<20:26, 2.53s/it] 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] {'loss': 1.0587, 'grad_norm': 20.2078914642334, 'learning_rate': 4.479470611971645e-07, 'fcm_dpo/beta': 0.0040567112155258656, 'fcm_dpo/q_t': 0.39734193682670593, 'fcm_dpo/delta': -0.06527149677276611, 'fcm_dpo/margin': 113.95219421386719, 'margin_dpo/margin_mean': 113.95220184326172, 'margin_dpo/margin_std': 150.6449737548828, 'logps/chosen': -176.78131103515625, 'logps/rejected': -332.9405822753906, 'logps/ref_chosen': -55.03618621826172, 'logps/ref_rejected': -97.24325561523438, 'KL/chosen_KL_mean': -121.74513244628906, 'KL/rejected_KL_mean': -235.69732666015625, 'KL/mean': -178.72122192382812, 'KL/std': 134.29644775390625, 'logits/chosen': -0.3216399848461151, 'logits/rejected': -0.3219534158706665, 'epoch': 0.29} + 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] 29%|██▉ | 199/681 [08:27<20:51, 2.60s/it] {'loss': 1.0711, 'grad_norm': 19.83755111694336, 'learning_rate': 4.471606039587695e-07, 'fcm_dpo/beta': 0.003988361917436123, 'fcm_dpo/q_t': 0.3982279896736145, 'fcm_dpo/delta': -0.05231431871652603, 'fcm_dpo/margin': 112.59829711914062, 'margin_dpo/margin_mean': 112.59829711914062, 'margin_dpo/margin_std': 149.75167846679688, 'logps/chosen': -174.24948120117188, 'logps/rejected': -314.66717529296875, 'logps/ref_chosen': -56.828826904296875, 'logps/ref_rejected': -84.64820861816406, 'KL/chosen_KL_mean': -117.42064666748047, 'KL/rejected_KL_mean': -230.0189666748047, 'KL/mean': -173.71978759765625, 'KL/std': 121.98558044433594, 'logits/chosen': -0.33298349380493164, 'logits/rejected': -0.3157057762145996, 'epoch': 0.29} + 29%|██▉ | 199/681 [08:27<20:51, 2.60s/it] 29%|██▉ | 200/681 [08:29<20:50, 2.60s/it] {'loss': 1.0904, 'grad_norm': 27.9632625579834, 'learning_rate': 4.4636895135509966e-07, 'fcm_dpo/beta': 0.003952971659600735, 'fcm_dpo/q_t': 0.4002299904823303, 'fcm_dpo/delta': -0.05722919851541519, 'fcm_dpo/margin': 114.91127014160156, 'margin_dpo/margin_mean': 114.9112548828125, 'margin_dpo/margin_std': 175.34649658203125, 'logps/chosen': -171.16647338867188, 'logps/rejected': -313.61907958984375, 'logps/ref_chosen': -53.06706237792969, 'logps/ref_rejected': -80.60843658447266, 'KL/chosen_KL_mean': -118.09941101074219, 'KL/rejected_KL_mean': -233.01065063476562, 'KL/mean': -175.55503845214844, 'KL/std': 135.21307373046875, 'logits/chosen': -0.2579571604728699, 'logits/rejected': -0.24183320999145508, 'epoch': 0.29} + 29%|██▉ | 200/681 [08:29<20:50, 2.60s/it] 30%|██▉ | 201/681 [08:32<20:44, 2.59s/it] {'loss': 1.079, 'grad_norm': 20.296661376953125, 'learning_rate': 4.455721242469372e-07, 'fcm_dpo/beta': 0.003938804380595684, 'fcm_dpo/q_t': 0.399710088968277, 'fcm_dpo/delta': -0.04832981526851654, 'fcm_dpo/margin': 113.26231384277344, 'margin_dpo/margin_mean': 113.26231384277344, 'margin_dpo/margin_std': 158.4004669189453, 'logps/chosen': -199.89532470703125, 'logps/rejected': -352.56365966796875, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'KL/chosen_KL_mean': -124.49310302734375, 'KL/rejected_KL_mean': -237.75543212890625, 'KL/mean': -181.124267578125, 'KL/std': 141.97161865234375, 'logits/chosen': -0.35240548849105835, 'logits/rejected': -0.35075196623802185, 'epoch': 0.3} + 30%|██▉ | 201/681 [08:32<20:44, 2.59s/it] 30%|██▉ | 202/681 [08:34<20:48, 2.61s/it] {'loss': 1.1859, 'grad_norm': 21.754104614257812, 'learning_rate': 4.4477014363141755e-07, 'fcm_dpo/beta': 0.003970341291278601, 'fcm_dpo/q_t': 0.4300415515899658, 'fcm_dpo/delta': 0.08675570785999298, 'fcm_dpo/margin': 79.567626953125, 'margin_dpo/margin_mean': 79.567626953125, 'margin_dpo/margin_std': 157.5809326171875, 'logps/chosen': -176.0801544189453, 'logps/rejected': -292.531494140625, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'KL/chosen_KL_mean': -125.97883605957031, 'KL/rejected_KL_mean': -205.54647827148438, 'KL/mean': -165.76266479492188, 'KL/std': 121.41524505615234, 'logits/chosen': -0.2775609493255615, 'logits/rejected': -0.29300734400749207, 'epoch': 0.3} + 30%|██▉ | 202/681 [08:34<20:48, 2.61s/it] 30%|██▉ | 203/681 [08:37<21:00, 2.64s/it] {'loss': 1.105, 'grad_norm': 18.4498233795166, 'learning_rate': 4.439630306414758e-07, 'fcm_dpo/beta': 0.003991924226284027, 'fcm_dpo/q_t': 0.41198647022247314, 'fcm_dpo/delta': 0.01531082671135664, 'fcm_dpo/margin': 96.51522827148438, 'margin_dpo/margin_mean': 96.51522827148438, 'margin_dpo/margin_std': 135.06640625, 'logps/chosen': -183.56845092773438, 'logps/rejected': -305.36993408203125, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'KL/chosen_KL_mean': -122.95875549316406, 'KL/rejected_KL_mean': -219.47396850585938, 'KL/mean': -171.21636962890625, 'KL/std': 122.28076934814453, 'logits/chosen': -0.30969899892807007, 'logits/rejected': -0.2990788519382477, 'epoch': 0.3} + 30%|██▉ | 203/681 [08:37<21:00, 2.64s/it] 30%|██▉ | 204/681 [08:40<21:11, 2.67s/it] {'loss': 1.1632, 'grad_norm': 23.171846389770508, 'learning_rate': 4.431508065452897e-07, 'fcm_dpo/beta': 0.00402648001909256, 'fcm_dpo/q_t': 0.42272064089775085, 'fcm_dpo/delta': 0.04800150915980339, 'fcm_dpo/margin': 87.84043884277344, 'margin_dpo/margin_mean': 87.84043884277344, 'margin_dpo/margin_std': 163.03756713867188, 'logps/chosen': -219.60906982421875, 'logps/rejected': -314.9804382324219, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'KL/chosen_KL_mean': -139.44410705566406, 'KL/rejected_KL_mean': -227.28453063964844, 'KL/mean': -183.3643341064453, 'KL/std': 135.77395629882812, 'logits/chosen': -0.40281885862350464, 'logits/rejected': -0.3679213225841522, 'epoch': 0.3} + 30%|██▉ | 204/681 [08:40<21:11, 2.67s/it] 30%|███ | 205/681 [08:42<21:00, 2.65s/it] {'loss': 1.0691, 'grad_norm': 19.90717124938965, 'learning_rate': 4.4233349274571974e-07, 'fcm_dpo/beta': 0.003965743817389011, 'fcm_dpo/q_t': 0.3951718211174011, 'fcm_dpo/delta': -0.061633773148059845, 'fcm_dpo/margin': 115.23167419433594, 'margin_dpo/margin_mean': 115.23167419433594, 'margin_dpo/margin_std': 149.4026641845703, 'logps/chosen': -190.44741821289062, 'logps/rejected': -331.4194030761719, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'KL/chosen_KL_mean': -131.06268310546875, 'KL/rejected_KL_mean': -246.2943572998047, 'KL/mean': -188.67852783203125, 'KL/std': 136.5657958984375, 'logits/chosen': -0.30089646577835083, 'logits/rejected': -0.27246442437171936, 'epoch': 0.3} + 30%|███ | 205/681 [08:43<21:00, 2.65s/it] 30%|███ | 206/681 [08:45<20:21, 2.57s/it] {'loss': 1.0192, 'grad_norm': 26.1035099029541, 'learning_rate': 4.415111107797445e-07, 'fcm_dpo/beta': 0.0039128996431827545, 'fcm_dpo/q_t': 0.38541656732559204, 'fcm_dpo/delta': -0.0976862832903862, 'fcm_dpo/margin': 125.78593444824219, 'margin_dpo/margin_mean': 125.78593444824219, 'margin_dpo/margin_std': 134.7174072265625, 'logps/chosen': -163.27394104003906, 'logps/rejected': -341.04888916015625, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'KL/chosen_KL_mean': -116.30944061279297, 'KL/rejected_KL_mean': -242.0953826904297, 'KL/mean': -179.20240783691406, 'KL/std': 123.43933868408203, 'logits/chosen': -0.2364203929901123, 'logits/rejected': -0.2409205287694931, 'epoch': 0.3} + 30%|███ | 206/681 [08:45<20:21, 2.57s/it] 30%|███ | 207/681 [08:47<20:08, 2.55s/it] {'loss': 0.9999, 'grad_norm': 31.017250061035156, 'learning_rate': 4.4068368231789365e-07, 'fcm_dpo/beta': 0.0038270847871899605, 'fcm_dpo/q_t': 0.37947410345077515, 'fcm_dpo/delta': -0.14793969690799713, 'fcm_dpo/margin': 141.11134338378906, 'margin_dpo/margin_mean': 141.111328125, 'margin_dpo/margin_std': 160.7344970703125, 'logps/chosen': -162.37765502929688, 'logps/rejected': -331.8805236816406, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'KL/chosen_KL_mean': -106.32139587402344, 'KL/rejected_KL_mean': -247.43272399902344, 'KL/mean': -176.8770751953125, 'KL/std': 138.58750915527344, 'logits/chosen': -0.3106921911239624, 'logits/rejected': -0.2881418466567993, 'epoch': 0.3} + 30%|███ | 207/681 [08:47<20:08, 2.55s/it] 31%|███ | 208/681 [08:50<20:18, 2.58s/it] {'loss': 1.0934, 'grad_norm': 28.11204719543457, 'learning_rate': 4.398512291636768e-07, 'fcm_dpo/beta': 0.0037661269307136536, 'fcm_dpo/q_t': 0.4015238881111145, 'fcm_dpo/delta': -0.03632538765668869, 'fcm_dpo/margin': 115.3888168334961, 'margin_dpo/margin_mean': 115.3888168334961, 'margin_dpo/margin_std': 170.67803955078125, 'logps/chosen': -235.37908935546875, 'logps/rejected': -377.9872131347656, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'KL/chosen_KL_mean': -168.3114776611328, 'KL/rejected_KL_mean': -283.7003173828125, 'KL/mean': -226.00588989257812, 'KL/std': 139.61465454101562, 'logits/chosen': -0.34051740169525146, 'logits/rejected': -0.32537776231765747, 'epoch': 0.31} + 31%|███ | 208/681 [08:50<20:18, 2.58s/it] 31%|███ | 209/681 [08:52<19:41, 2.50s/it] {'loss': 1.1282, 'grad_norm': 24.225013732910156, 'learning_rate': 4.3901377325300857e-07, 'fcm_dpo/beta': 0.003780151717364788, 'fcm_dpo/q_t': 0.41364553570747375, 'fcm_dpo/delta': 0.020004911348223686, 'fcm_dpo/margin': 100.72296142578125, 'margin_dpo/margin_mean': 100.72296142578125, 'margin_dpo/margin_std': 158.71670532226562, 'logps/chosen': -194.16268920898438, 'logps/rejected': -319.6455078125, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'KL/chosen_KL_mean': -137.9810028076172, 'KL/rejected_KL_mean': -238.7039794921875, 'KL/mean': -188.3424835205078, 'KL/std': 125.35417175292969, 'logits/chosen': -0.23914602398872375, 'logits/rejected': -0.22716867923736572, 'epoch': 0.31} + 31%|███ | 209/681 [08:52<19:41, 2.50s/it] 31%|███ | 210/681 [08:55<19:31, 2.49s/it] {'loss': 1.0734, 'grad_norm': 22.758333206176758, 'learning_rate': 4.381713366536311e-07, 'fcm_dpo/beta': 0.003775266231968999, 'fcm_dpo/q_t': 0.40104708075523376, 'fcm_dpo/delta': -0.03721902519464493, 'fcm_dpo/margin': 115.35026550292969, 'margin_dpo/margin_mean': 115.35028076171875, 'margin_dpo/margin_std': 150.81455993652344, 'logps/chosen': -168.17947387695312, 'logps/rejected': -313.83953857421875, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'KL/chosen_KL_mean': -121.80764770507812, 'KL/rejected_KL_mean': -237.15792846679688, 'KL/mean': -179.4827880859375, 'KL/std': 125.80329895019531, 'logits/chosen': -0.27275052666664124, 'logits/rejected': -0.2652055025100708, 'epoch': 0.31} + 31%|███ | 210/681 [08:55<19:31, 2.49s/it] 31%|███ | 211/681 [08:57<19:04, 2.44s/it] {'loss': 1.1417, 'grad_norm': 27.190279006958008, 'learning_rate': 4.373239415645323e-07, 'fcm_dpo/beta': 0.00377118238247931, 'fcm_dpo/q_t': 0.41908180713653564, 'fcm_dpo/delta': 0.02294088713824749, 'fcm_dpo/margin': 100.20367431640625, 'margin_dpo/margin_mean': 100.20367431640625, 'margin_dpo/margin_std': 171.17764282226562, 'logps/chosen': -256.9510192871094, 'logps/rejected': -365.0433349609375, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'KL/chosen_KL_mean': -178.0186767578125, 'KL/rejected_KL_mean': -278.22235107421875, 'KL/mean': -228.12051391601562, 'KL/std': 146.26031494140625, 'logits/chosen': -0.3228422999382019, 'logits/rejected': -0.2874525487422943, 'epoch': 0.31} + 31%|███ | 211/681 [08:57<19:04, 2.44s/it] 31%|███ | 212/681 [09:00<19:18, 2.47s/it] {'loss': 1.0371, 'grad_norm': 24.610458374023438, 'learning_rate': 4.3647161031536086e-07, 'fcm_dpo/beta': 0.0036756170447915792, 'fcm_dpo/q_t': 0.3847534656524658, 'fcm_dpo/delta': -0.12374652922153473, 'fcm_dpo/margin': 140.2224884033203, 'margin_dpo/margin_mean': 140.22247314453125, 'margin_dpo/margin_std': 175.03460693359375, 'logps/chosen': -207.1398162841797, 'logps/rejected': -392.2231140136719, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05785369873047, 'KL/chosen_KL_mean': -148.94281005859375, 'KL/rejected_KL_mean': -289.165283203125, 'KL/mean': -219.05404663085938, 'KL/std': 156.24212646484375, 'logits/chosen': -0.29684221744537354, 'logits/rejected': -0.29030919075012207, 'epoch': 0.31} + 31%|███ | 212/681 [09:00<19:18, 2.47s/it] 31%|███▏ | 213/681 [09:02<19:33, 2.51s/it] {'loss': 1.0262, 'grad_norm': 24.44261360168457, 'learning_rate': 4.3561436536583774e-07, 'fcm_dpo/beta': 0.003611032385379076, 'fcm_dpo/q_t': 0.3859563171863556, 'fcm_dpo/delta': -0.1061759814620018, 'fcm_dpo/margin': 138.56756591796875, 'margin_dpo/margin_mean': 138.56756591796875, 'margin_dpo/margin_std': 162.03538513183594, 'logps/chosen': -206.11019897460938, 'logps/rejected': -371.07977294921875, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'KL/chosen_KL_mean': -138.5974884033203, 'KL/rejected_KL_mean': -277.1650390625, 'KL/mean': -207.8812713623047, 'KL/std': 138.962158203125, 'logits/chosen': -0.3207147717475891, 'logits/rejected': -0.29644298553466797, 'epoch': 0.31} + 31%|███▏ | 213/681 [09:02<19:33, 2.51s/it] 31%|███▏ | 214/681 [09:04<18:54, 2.43s/it] {'loss': 1.0675, 'grad_norm': 21.233543395996094, 'learning_rate': 4.3475222930516473e-07, 'fcm_dpo/beta': 0.003579269163310528, 'fcm_dpo/q_t': 0.4001598358154297, 'fcm_dpo/delta': -0.04467523843050003, 'fcm_dpo/margin': 123.68592071533203, 'margin_dpo/margin_mean': 123.68592834472656, 'margin_dpo/margin_std': 161.08143615722656, 'logps/chosen': -159.85321044921875, 'logps/rejected': -319.45166015625, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'KL/chosen_KL_mean': -118.24832916259766, 'KL/rejected_KL_mean': -241.93423461914062, 'KL/mean': -180.09129333496094, 'KL/std': 136.1957550048828, 'logits/chosen': -0.27026987075805664, 'logits/rejected': -0.2760501205921173, 'epoch': 0.31} + 31%|███▏ | 214/681 [09:05<18:54, 2.43s/it] 32%|███▏ | 215/681 [09:07<19:20, 2.49s/it] {'loss': 1.0582, 'grad_norm': 22.27779769897461, 'learning_rate': 4.3388522485142885e-07, 'fcm_dpo/beta': 0.003539241384714842, 'fcm_dpo/q_t': 0.3984990119934082, 'fcm_dpo/delta': -0.043820615857839584, 'fcm_dpo/margin': 124.79898071289062, 'margin_dpo/margin_mean': 124.79898071289062, 'margin_dpo/margin_std': 149.58108520507812, 'logps/chosen': -197.81959533691406, 'logps/rejected': -359.303955078125, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'KL/chosen_KL_mean': -144.5403289794922, 'KL/rejected_KL_mean': -269.33929443359375, 'KL/mean': -206.9398193359375, 'KL/std': 141.94100952148438, 'logits/chosen': -0.30568164587020874, 'logits/rejected': -0.3003992736339569, 'epoch': 0.32} + 32%|███▏ | 215/681 [09:07<19:20, 2.49s/it] 32%|███▏ | 216/681 [09:10<20:02, 2.59s/it] {'loss': 1.0879, 'grad_norm': 22.973649978637695, 'learning_rate': 4.330133748510036e-07, 'fcm_dpo/beta': 0.003528446890413761, 'fcm_dpo/q_t': 0.40117913484573364, 'fcm_dpo/delta': -0.04068659618496895, 'fcm_dpo/margin': 124.37261199951172, 'margin_dpo/margin_mean': 124.37261199951172, 'margin_dpo/margin_std': 179.5612030029297, 'logps/chosen': -194.51840209960938, 'logps/rejected': -347.2021484375, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'KL/chosen_KL_mean': -145.63059997558594, 'KL/rejected_KL_mean': -270.00323486328125, 'KL/mean': -207.81692504882812, 'KL/std': 146.82232666015625, 'logits/chosen': -0.3099059760570526, 'logits/rejected': -0.29590481519699097, 'epoch': 0.32} + 32%|███▏ | 216/681 [09:10<20:02, 2.59s/it] 32%|███▏ | 217/681 [09:12<19:47, 2.56s/it] {'loss': 1.0126, 'grad_norm': 19.35890007019043, 'learning_rate': 4.3213670227794757e-07, 'fcm_dpo/beta': 0.0034370056819170713, 'fcm_dpo/q_t': 0.3825801610946655, 'fcm_dpo/delta': -0.13140688836574554, 'fcm_dpo/margin': 152.53384399414062, 'margin_dpo/margin_mean': 152.53384399414062, 'margin_dpo/margin_std': 178.2061767578125, 'logps/chosen': -195.79861450195312, 'logps/rejected': -398.56549072265625, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'KL/chosen_KL_mean': -145.95330810546875, 'KL/rejected_KL_mean': -298.4871826171875, 'KL/mean': -222.22024536132812, 'KL/std': 154.306396484375, 'logits/chosen': -0.2936730980873108, 'logits/rejected': -0.2913660407066345, 'epoch': 0.32} + 32%|███▏ | 217/681 [09:12<19:47, 2.56s/it] 32%|███▏ | 218/681 [09:15<19:53, 2.58s/it] {'loss': 1.1119, 'grad_norm': 20.17465591430664, 'learning_rate': 4.3125523023339815e-07, 'fcm_dpo/beta': 0.003414642531424761, 'fcm_dpo/q_t': 0.4108254313468933, 'fcm_dpo/delta': 0.00023527629673480988, 'fcm_dpo/margin': 117.06442260742188, 'margin_dpo/margin_mean': 117.06441497802734, 'margin_dpo/margin_std': 178.00685119628906, 'logps/chosen': -208.75689697265625, 'logps/rejected': -355.09100341796875, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'KL/chosen_KL_mean': -150.1802215576172, 'KL/rejected_KL_mean': -267.24462890625, 'KL/mean': -208.71240234375, 'KL/std': 148.29302978515625, 'logits/chosen': -0.304365873336792, 'logits/rejected': -0.3002937436103821, 'epoch': 0.32} + 32%|███▏ | 218/681 [09:15<19:53, 2.58s/it] 32%|███▏ | 219/681 [09:18<19:58, 2.59s/it] {'loss': 1.1775, 'grad_norm': 28.482004165649414, 'learning_rate': 4.303689819449636e-07, 'fcm_dpo/beta': 0.0034687574952840805, 'fcm_dpo/q_t': 0.42399221658706665, 'fcm_dpo/delta': 0.059782225638628006, 'fcm_dpo/margin': 98.41824340820312, 'margin_dpo/margin_mean': 98.4182357788086, 'margin_dpo/margin_std': 191.77099609375, 'logps/chosen': -222.39869689941406, 'logps/rejected': -345.5634765625, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'KL/chosen_KL_mean': -161.31483459472656, 'KL/rejected_KL_mean': -259.7330627441406, 'KL/mean': -210.52394104003906, 'KL/std': 153.1935577392578, 'logits/chosen': -0.3322892487049103, 'logits/rejected': -0.3280831575393677, 'epoch': 0.32} + 32%|███▏ | 219/681 [09:18<19:58, 2.59s/it] 32%|███▏ | 220/681 [09:20<19:55, 2.59s/it] {'loss': 1.1759, 'grad_norm': 23.411617279052734, 'learning_rate': 4.2947798076611047e-07, 'fcm_dpo/beta': 0.0035150342155247927, 'fcm_dpo/q_t': 0.43284928798675537, 'fcm_dpo/delta': 0.11705435812473297, 'fcm_dpo/margin': 81.49143981933594, 'margin_dpo/margin_mean': 81.49143981933594, 'margin_dpo/margin_std': 136.2982177734375, 'logps/chosen': -248.0543975830078, 'logps/rejected': -347.2000732421875, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'KL/chosen_KL_mean': -178.02310180664062, 'KL/rejected_KL_mean': -259.5145568847656, 'KL/mean': -218.76882934570312, 'KL/std': 128.1570587158203, 'logits/chosen': -0.34668925404548645, 'logits/rejected': -0.32663899660110474, 'epoch': 0.32} + 32%|███▏ | 220/681 [09:20<19:55, 2.59s/it] 32%|███▏ | 221/681 [09:23<19:40, 2.57s/it] {'loss': 0.9405, 'grad_norm': 30.417999267578125, 'learning_rate': 4.285822501755485e-07, 'fcm_dpo/beta': 0.0034304747823625803, 'fcm_dpo/q_t': 0.3586632013320923, 'fcm_dpo/delta': -0.23470783233642578, 'fcm_dpo/margin': 180.9005584716797, 'margin_dpo/margin_mean': 180.9005584716797, 'margin_dpo/margin_std': 174.17965698242188, 'logps/chosen': -200.29312133789062, 'logps/rejected': -435.50665283203125, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'KL/chosen_KL_mean': -148.138427734375, 'KL/rejected_KL_mean': -329.03900146484375, 'KL/mean': -238.5886993408203, 'KL/std': 168.93533325195312, 'logits/chosen': -0.3302137851715088, 'logits/rejected': -0.3399258852005005, 'epoch': 0.32} + 32%|███▏ | 221/681 [09:23<19:40, 2.57s/it] 33%|███▎ | 222/681 [09:25<19:36, 2.56s/it] {'loss': 1.0513, 'grad_norm': 26.266162872314453, 'learning_rate': 4.276818137766118e-07, 'fcm_dpo/beta': 0.0033583808690309525, 'fcm_dpo/q_t': 0.39468562602996826, 'fcm_dpo/delta': -0.06650819629430771, 'fcm_dpo/margin': 137.9884796142578, 'margin_dpo/margin_mean': 137.9884796142578, 'margin_dpo/margin_std': 170.7541046142578, 'logps/chosen': -211.3773193359375, 'logps/rejected': -388.3958740234375, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'KL/chosen_KL_mean': -150.40621948242188, 'KL/rejected_KL_mean': -288.39471435546875, 'KL/mean': -219.40045166015625, 'KL/std': 150.12142944335938, 'logits/chosen': -0.3900914788246155, 'logits/rejected': -0.39828717708587646, 'epoch': 0.33} + 33%|███▎ | 222/681 [09:25<19:36, 2.56s/it] 33%|███▎ | 223/681 [09:28<18:41, 2.45s/it] {'loss': 1.1331, 'grad_norm': 27.220115661621094, 'learning_rate': 4.2677669529663686e-07, 'fcm_dpo/beta': 0.0033374596387147903, 'fcm_dpo/q_t': 0.413519948720932, 'fcm_dpo/delta': 0.014067416079342365, 'fcm_dpo/margin': 115.79170227050781, 'margin_dpo/margin_mean': 115.79170227050781, 'margin_dpo/margin_std': 194.0624542236328, 'logps/chosen': -219.76812744140625, 'logps/rejected': -365.7442626953125, 'logps/ref_chosen': -52.64057540893555, 'logps/ref_rejected': -82.82502746582031, 'KL/chosen_KL_mean': -167.1275634765625, 'KL/rejected_KL_mean': -282.91925048828125, 'KL/mean': -225.02340698242188, 'KL/std': 149.61036682128906, 'logits/chosen': -0.28457504510879517, 'logits/rejected': -0.28229665756225586, 'epoch': 0.33} + 33%|███▎ | 223/681 [09:28<18:41, 2.45s/it] 33%|███▎ | 224/681 [09:30<17:53, 2.35s/it] {'loss': 1.0896, 'grad_norm': 22.146337509155273, 'learning_rate': 4.2586691858633747e-07, 'fcm_dpo/beta': 0.003296963172033429, 'fcm_dpo/q_t': 0.4030272364616394, 'fcm_dpo/delta': -0.046691399067640305, 'fcm_dpo/margin': 134.4194793701172, 'margin_dpo/margin_mean': 134.4194793701172, 'margin_dpo/margin_std': 198.03443908691406, 'logps/chosen': -200.1857147216797, 'logps/rejected': -363.12628173828125, 'logps/ref_chosen': -48.59541320800781, 'logps/ref_rejected': -77.11648559570312, 'KL/chosen_KL_mean': -151.59030151367188, 'KL/rejected_KL_mean': -286.0097961425781, 'KL/mean': -218.800048828125, 'KL/std': 170.38592529296875, 'logits/chosen': -0.3159272074699402, 'logits/rejected': -0.29905927181243896, 'epoch': 0.33} + 33%|███▎ | 224/681 [09:30<17:53, 2.35s/it] 33%|███▎ | 225/681 [09:32<17:39, 2.32s/it] {'loss': 1.0399, 'grad_norm': 20.99376678466797, 'learning_rate': 4.249525076191759e-07, 'fcm_dpo/beta': 0.00325207132846117, 'fcm_dpo/q_t': 0.3860987424850464, 'fcm_dpo/delta': -0.12169913947582245, 'fcm_dpo/margin': 158.46151733398438, 'margin_dpo/margin_mean': 158.4615020751953, 'margin_dpo/margin_std': 211.03097534179688, 'logps/chosen': -236.26797485351562, 'logps/rejected': -436.6319274902344, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90291595458984, 'KL/chosen_KL_mean': -178.26751708984375, 'KL/rejected_KL_mean': -336.72900390625, 'KL/mean': -257.4982604980469, 'KL/std': 167.69284057617188, 'logits/chosen': -0.31401747465133667, 'logits/rejected': -0.3065524101257324, 'epoch': 0.33} + 33%|███▎ | 225/681 [09:32<17:39, 2.32s/it] 33%|███▎ | 226/681 [09:35<18:17, 2.41s/it] {'loss': 1.1111, 'grad_norm': 24.45271873474121, 'learning_rate': 4.2403348649073167e-07, 'fcm_dpo/beta': 0.003208290785551071, 'fcm_dpo/q_t': 0.41000163555145264, 'fcm_dpo/delta': -0.01261284202337265, 'fcm_dpo/margin': 128.18907165527344, 'margin_dpo/margin_mean': 128.18907165527344, 'margin_dpo/margin_std': 198.9245147705078, 'logps/chosen': -213.14964294433594, 'logps/rejected': -361.127685546875, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'KL/chosen_KL_mean': -154.25083923339844, 'KL/rejected_KL_mean': -282.43994140625, 'KL/mean': -218.34536743164062, 'KL/std': 166.46963500976562, 'logits/chosen': -0.3766024708747864, 'logits/rejected': -0.3424544930458069, 'epoch': 0.33} + 33%|███▎ | 226/681 [09:35<18:17, 2.41s/it] 33%|███▎ | 227/681 [09:37<18:04, 2.39s/it] {'loss': 1.0312, 'grad_norm': 20.904441833496094, 'learning_rate': 4.2310987941806615e-07, 'fcm_dpo/beta': 0.003157012164592743, 'fcm_dpo/q_t': 0.38600099086761475, 'fcm_dpo/delta': -0.11616270244121552, 'fcm_dpo/margin': 161.3520050048828, 'margin_dpo/margin_mean': 161.35202026367188, 'margin_dpo/margin_std': 202.92633056640625, 'logps/chosen': -240.2106170654297, 'logps/rejected': -441.9028015136719, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'KL/chosen_KL_mean': -181.13844299316406, 'KL/rejected_KL_mean': -342.49041748046875, 'KL/mean': -261.814453125, 'KL/std': 189.39837646484375, 'logits/chosen': -0.34949034452438354, 'logits/rejected': -0.3386501669883728, 'epoch': 0.33} + 33%|███▎ | 227/681 [09:37<18:04, 2.39s/it] 33%|███▎ | 228/681 [09:40<18:43, 2.48s/it] {'loss': 1.1621, 'grad_norm': 23.708755493164062, 'learning_rate': 4.2218171073908463e-07, 'fcm_dpo/beta': 0.0031847129575908184, 'fcm_dpo/q_t': 0.42142853140830994, 'fcm_dpo/delta': 0.05459333956241608, 'fcm_dpo/margin': 108.98725128173828, 'margin_dpo/margin_mean': 108.98725128173828, 'margin_dpo/margin_std': 198.34036254882812, 'logps/chosen': -247.97756958007812, 'logps/rejected': -382.12225341796875, 'logps/ref_chosen': -65.89128875732422, 'logps/ref_rejected': -91.04875183105469, 'KL/chosen_KL_mean': -182.08627319335938, 'KL/rejected_KL_mean': -291.0735168457031, 'KL/mean': -236.57989501953125, 'KL/std': 154.74215698242188, 'logits/chosen': -0.33513695001602173, 'logits/rejected': -0.31932687759399414, 'epoch': 0.33} + 33%|███▎ | 228/681 [09:40<18:43, 2.48s/it] 34%|███▎ | 229/681 [09:42<18:40, 2.48s/it] {'loss': 1.1379, 'grad_norm': 32.29511642456055, 'learning_rate': 4.212490049118951e-07, 'fcm_dpo/beta': 0.003205793909728527, 'fcm_dpo/q_t': 0.41705501079559326, 'fcm_dpo/delta': 0.0365116223692894, 'fcm_dpo/margin': 113.79228210449219, 'margin_dpo/margin_mean': 113.79228210449219, 'margin_dpo/margin_std': 187.65060424804688, 'logps/chosen': -259.8101501464844, 'logps/rejected': -387.4234619140625, 'logps/ref_chosen': -70.70637512207031, 'logps/ref_rejected': -84.52741241455078, 'KL/chosen_KL_mean': -189.10377502441406, 'KL/rejected_KL_mean': -302.89605712890625, 'KL/mean': -245.99990844726562, 'KL/std': 178.9686279296875, 'logits/chosen': -0.4073641300201416, 'logits/rejected': -0.37577980756759644, 'epoch': 0.34} + 34%|███▎ | 229/681 [09:42<18:40, 2.48s/it] 34%|███▍ | 230/681 [09:44<18:23, 2.45s/it] {'loss': 0.9731, 'grad_norm': 29.884607315063477, 'learning_rate': 4.203117865141635e-07, 'fcm_dpo/beta': 0.0031274245120584965, 'fcm_dpo/q_t': 0.37051764130592346, 'fcm_dpo/delta': -0.171125590801239, 'fcm_dpo/margin': 179.509765625, 'margin_dpo/margin_mean': 179.509765625, 'margin_dpo/margin_std': 179.34201049804688, 'logps/chosen': -174.06509399414062, 'logps/rejected': -399.9147644042969, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'KL/chosen_KL_mean': -134.7830810546875, 'KL/rejected_KL_mean': -314.2928466796875, 'KL/mean': -224.5379638671875, 'KL/std': 161.85427856445312, 'logits/chosen': -0.33008766174316406, 'logits/rejected': -0.3357307016849518, 'epoch': 0.34} + 34%|███▍ | 230/681 [09:44<18:23, 2.45s/it] 34%|███▍ | 231/681 [09:47<18:44, 2.50s/it] {'loss': 1.1104, 'grad_norm': 23.125385284423828, 'learning_rate': 4.1937008024246625e-07, 'fcm_dpo/beta': 0.0031172512099146843, 'fcm_dpo/q_t': 0.4157181978225708, 'fcm_dpo/delta': 0.027481382712721825, 'fcm_dpo/margin': 119.82038116455078, 'margin_dpo/margin_mean': 119.82037353515625, 'margin_dpo/margin_std': 167.3756866455078, 'logps/chosen': -216.2100830078125, 'logps/rejected': -346.8779296875, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'KL/chosen_KL_mean': -152.9336395263672, 'KL/rejected_KL_mean': -272.7540283203125, 'KL/mean': -212.84384155273438, 'KL/std': 138.79013061523438, 'logits/chosen': -0.38110148906707764, 'logits/rejected': -0.35203638672828674, 'epoch': 0.34} + 34%|███▍ | 231/681 [09:47<18:44, 2.50s/it] 34%|███▍ | 232/681 [09:50<19:10, 2.56s/it] {'loss': 1.1763, 'grad_norm': 21.933931350708008, 'learning_rate': 4.1842391091163933e-07, 'fcm_dpo/beta': 0.003163769142702222, 'fcm_dpo/q_t': 0.4341059923171997, 'fcm_dpo/delta': 0.10632483661174774, 'fcm_dpo/margin': 93.8593978881836, 'margin_dpo/margin_mean': 93.8593978881836, 'margin_dpo/margin_std': 168.48648071289062, 'logps/chosen': -254.9521484375, 'logps/rejected': -362.03985595703125, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'KL/chosen_KL_mean': -184.20339965820312, 'KL/rejected_KL_mean': -278.06280517578125, 'KL/mean': -231.1331024169922, 'KL/std': 157.26010131835938, 'logits/chosen': -0.397582471370697, 'logits/rejected': -0.37435561418533325, 'epoch': 0.34} + 34%|███▍ | 232/681 [09:50<19:10, 2.56s/it] 34%|███▍ | 233/681 [09:52<19:30, 2.61s/it] {'loss': 1.0642, 'grad_norm': 22.029088973999023, 'learning_rate': 4.174733034541245e-07, 'fcm_dpo/beta': 0.0031418418511748314, 'fcm_dpo/q_t': 0.39174312353134155, 'fcm_dpo/delta': -0.1073966920375824, 'fcm_dpo/margin': 159.8199462890625, 'margin_dpo/margin_mean': 159.8199462890625, 'margin_dpo/margin_std': 231.9136199951172, 'logps/chosen': -215.11419677734375, 'logps/rejected': -427.5312805175781, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.4800796508789, 'KL/chosen_KL_mean': -160.23126220703125, 'KL/rejected_KL_mean': -320.05120849609375, 'KL/mean': -240.1412353515625, 'KL/std': 174.00173950195312, 'logits/chosen': -0.3995114266872406, 'logits/rejected': -0.40224993228912354, 'epoch': 0.34} + 34%|███▍ | 233/681 [09:52<19:30, 2.61s/it] 34%|███▍ | 234/681 [09:55<19:35, 2.63s/it] {'loss': 1.0221, 'grad_norm': 25.708904266357422, 'learning_rate': 4.165182829193126e-07, 'fcm_dpo/beta': 0.0030462380964308977, 'fcm_dpo/q_t': 0.38654690980911255, 'fcm_dpo/delta': -0.10102301090955734, 'fcm_dpo/margin': 162.1198272705078, 'margin_dpo/margin_mean': 162.1198272705078, 'margin_dpo/margin_std': 174.42822265625, 'logps/chosen': -193.39898681640625, 'logps/rejected': -411.430908203125, 'logps/ref_chosen': -44.094520568847656, 'logps/ref_rejected': -100.00663757324219, 'KL/chosen_KL_mean': -149.30445861816406, 'KL/rejected_KL_mean': -311.4242858886719, 'KL/mean': -230.3643798828125, 'KL/std': 154.63232421875, 'logits/chosen': -0.3387085795402527, 'logits/rejected': -0.36640793085098267, 'epoch': 0.34} + 34%|███▍ | 234/681 [09:55<19:35, 2.63s/it] 35%|███▍ | 235/681 [09:58<19:06, 2.57s/it] {'loss': 1.1737, 'grad_norm': 29.176280975341797, 'learning_rate': 4.1555887447288255e-07, 'fcm_dpo/beta': 0.0031026601791381836, 'fcm_dpo/q_t': 0.42741718888282776, 'fcm_dpo/delta': 0.08341732621192932, 'fcm_dpo/margin': 102.77667236328125, 'margin_dpo/margin_mean': 102.77667236328125, 'margin_dpo/margin_std': 188.78956604003906, 'logps/chosen': -259.1474609375, 'logps/rejected': -390.0812683105469, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39506530761719, 'KL/chosen_KL_mean': -196.90953063964844, 'KL/rejected_KL_mean': -299.68621826171875, 'KL/mean': -248.29786682128906, 'KL/std': 150.12139892578125, 'logits/chosen': -0.4230844974517822, 'logits/rejected': -0.4051060080528259, 'epoch': 0.35} + 35%|███▍ | 235/681 [09:58<19:06, 2.57s/it] 35%|███▍ | 236/681 [10:00<19:18, 2.60s/it] {'loss': 0.9972, 'grad_norm': 40.81807327270508, 'learning_rate': 4.1459510339613946e-07, 'fcm_dpo/beta': 0.0030574114061892033, 'fcm_dpo/q_t': 0.3818013072013855, 'fcm_dpo/delta': -0.11282503604888916, 'fcm_dpo/margin': 165.8770294189453, 'margin_dpo/margin_mean': 165.87701416015625, 'margin_dpo/margin_std': 158.9945526123047, 'logps/chosen': -194.72613525390625, 'logps/rejected': -414.7734375, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'KL/chosen_KL_mean': -145.384765625, 'KL/rejected_KL_mean': -311.2618103027344, 'KL/mean': -228.32327270507812, 'KL/std': 162.75277709960938, 'logits/chosen': -0.37076273560523987, 'logits/rejected': -0.37188154458999634, 'epoch': 0.35} + 35%|███▍ | 236/681 [10:00<19:18, 2.60s/it] 35%|███▍ | 237/681 [10:03<19:18, 2.61s/it] {'loss': 1.1268, 'grad_norm': 31.793489456176758, 'learning_rate': 4.136269950853473e-07, 'fcm_dpo/beta': 0.0030537089332938194, 'fcm_dpo/q_t': 0.41523268818855286, 'fcm_dpo/delta': 0.02740669995546341, 'fcm_dpo/margin': 122.33269500732422, 'margin_dpo/margin_mean': 122.33268737792969, 'margin_dpo/margin_std': 192.3961639404297, 'logps/chosen': -258.8587646484375, 'logps/rejected': -421.8037109375, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'KL/chosen_KL_mean': -204.69064331054688, 'KL/rejected_KL_mean': -327.0233459472656, 'KL/mean': -265.85699462890625, 'KL/std': 163.26248168945312, 'logits/chosen': -0.3925231993198395, 'logits/rejected': -0.38862764835357666, 'epoch': 0.35} + 35%|███▍ | 237/681 [10:03<19:18, 2.61s/it] 35%|███▍ | 238/681 [10:06<19:30, 2.64s/it] {'loss': 1.1026, 'grad_norm': 23.8784236907959, 'learning_rate': 4.126545750510605e-07, 'fcm_dpo/beta': 0.0030407910235226154, 'fcm_dpo/q_t': 0.4116755723953247, 'fcm_dpo/delta': 0.004025213420391083, 'fcm_dpo/margin': 130.10498046875, 'margin_dpo/margin_mean': 130.10496520996094, 'margin_dpo/margin_std': 184.20098876953125, 'logps/chosen': -239.7442626953125, 'logps/rejected': -405.2940673828125, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'KL/chosen_KL_mean': -185.77113342285156, 'KL/rejected_KL_mean': -315.8760986328125, 'KL/mean': -250.8236083984375, 'KL/std': 168.3731689453125, 'logits/chosen': -0.3695657551288605, 'logits/rejected': -0.3817945122718811, 'epoch': 0.35} + 35%|███▍ | 238/681 [10:06<19:30, 2.64s/it] 35%|███▌ | 239/681 [10:08<18:44, 2.54s/it] {'loss': 1.0808, 'grad_norm': 22.221601486206055, 'learning_rate': 4.116778689174514e-07, 'fcm_dpo/beta': 0.003023794386535883, 'fcm_dpo/q_t': 0.40190714597702026, 'fcm_dpo/delta': -0.028100494295358658, 'fcm_dpo/margin': 140.81825256347656, 'margin_dpo/margin_mean': 140.81825256347656, 'margin_dpo/margin_std': 182.08518981933594, 'logps/chosen': -268.81298828125, 'logps/rejected': -445.1263732910156, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'KL/chosen_KL_mean': -210.7151641845703, 'KL/rejected_KL_mean': -351.5334167480469, 'KL/mean': -281.1242980957031, 'KL/std': 166.0899200439453, 'logits/chosen': -0.368292897939682, 'logits/rejected': -0.36200201511383057, 'epoch': 0.35} + 35%|███▌ | 239/681 [10:08<18:44, 2.54s/it] 35%|███▌ | 240/681 [10:11<18:57, 2.58s/it] {'loss': 1.1633, 'grad_norm': 35.80266571044922, 'learning_rate': 4.106969024216348e-07, 'fcm_dpo/beta': 0.0030523231253027916, 'fcm_dpo/q_t': 0.42092815041542053, 'fcm_dpo/delta': 0.05162970349192619, 'fcm_dpo/margin': 114.69682312011719, 'margin_dpo/margin_mean': 114.69682312011719, 'margin_dpo/margin_std': 208.11935424804688, 'logps/chosen': -277.6118469238281, 'logps/rejected': -405.81268310546875, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'KL/chosen_KL_mean': -216.9973602294922, 'KL/rejected_KL_mean': -331.69415283203125, 'KL/mean': -274.34576416015625, 'KL/std': 175.84886169433594, 'logits/chosen': -0.38743269443511963, 'logits/rejected': -0.3659111261367798, 'epoch': 0.35} + 35%|███▌ | 240/681 [10:11<18:57, 2.58s/it] 35%|███▌ | 241/681 [10:13<18:36, 2.54s/it] {'loss': 0.9919, 'grad_norm': 22.811471939086914, 'learning_rate': 4.097117014129903e-07, 'fcm_dpo/beta': 0.002998801413923502, 'fcm_dpo/q_t': 0.37420031428337097, 'fcm_dpo/delta': -0.17671522498130798, 'fcm_dpo/margin': 189.06454467773438, 'margin_dpo/margin_mean': 189.06454467773438, 'margin_dpo/margin_std': 214.1299591064453, 'logps/chosen': -247.47486877441406, 'logps/rejected': -458.50921630859375, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'KL/chosen_KL_mean': -181.38380432128906, 'KL/rejected_KL_mean': -370.4483337402344, 'KL/mean': -275.91607666015625, 'KL/std': 190.72769165039062, 'logits/chosen': -0.37639686465263367, 'logits/rejected': -0.3528909683227539, 'epoch': 0.35} + 35%|███▌ | 241/681 [10:13<18:36, 2.54s/it] 36%|███▌ | 242/681 [10:15<18:21, 2.51s/it] {'loss': 1.1239, 'grad_norm': 26.62445068359375, 'learning_rate': 4.087222918524807e-07, 'fcm_dpo/beta': 0.0029919487424194813, 'fcm_dpo/q_t': 0.41582486033439636, 'fcm_dpo/delta': 0.023674048483371735, 'fcm_dpo/margin': 125.83666229248047, 'margin_dpo/margin_mean': 125.83665466308594, 'margin_dpo/margin_std': 194.09152221679688, 'logps/chosen': -261.4253234863281, 'logps/rejected': -402.75836181640625, 'logps/ref_chosen': -67.86392974853516, 'logps/ref_rejected': -83.36033630371094, 'KL/chosen_KL_mean': -193.56138610839844, 'KL/rejected_KL_mean': -319.3980407714844, 'KL/mean': -256.479736328125, 'KL/std': 160.45431518554688, 'logits/chosen': -0.3719189763069153, 'logits/rejected': -0.35311925411224365, 'epoch': 0.36} + 36%|███▌ | 242/681 [10:15<18:21, 2.51s/it] 36%|███▌ | 243/681 [10:18<18:21, 2.51s/it] {'loss': 1.0455, 'grad_norm': 31.950265884399414, 'learning_rate': 4.07728699811968e-07, 'fcm_dpo/beta': 0.0029446138069033623, 'fcm_dpo/q_t': 0.39357781410217285, 'fcm_dpo/delta': -0.07035504281520844, 'fcm_dpo/margin': 158.58251953125, 'margin_dpo/margin_mean': 158.58251953125, 'margin_dpo/margin_std': 192.53515625, 'logps/chosen': -245.1937255859375, 'logps/rejected': -417.02764892578125, 'logps/ref_chosen': -63.0842399597168, 'logps/ref_rejected': -76.33563232421875, 'KL/chosen_KL_mean': -182.10948181152344, 'KL/rejected_KL_mean': -340.6920166015625, 'KL/mean': -261.4007568359375, 'KL/std': 171.34756469726562, 'logits/chosen': -0.374033123254776, 'logits/rejected': -0.34913793206214905, 'epoch': 0.36} + 36%|███▌ | 243/681 [10:18<18:21, 2.51s/it] 36%|███▌ | 244/681 [10:20<18:17, 2.51s/it] {'loss': 1.0296, 'grad_norm': 34.01569747924805, 'learning_rate': 4.067309514735267e-07, 'fcm_dpo/beta': 0.0029027406126260757, 'fcm_dpo/q_t': 0.392170250415802, 'fcm_dpo/delta': -0.06980758905410767, 'fcm_dpo/margin': 160.67831420898438, 'margin_dpo/margin_mean': 160.67831420898438, 'margin_dpo/margin_std': 168.09088134765625, 'logps/chosen': -214.076171875, 'logps/rejected': -408.5057067871094, 'logps/ref_chosen': -61.140689849853516, 'logps/ref_rejected': -94.89193725585938, 'KL/chosen_KL_mean': -152.93548583984375, 'KL/rejected_KL_mean': -313.61376953125, 'KL/mean': -233.27462768554688, 'KL/std': 160.87197875976562, 'logits/chosen': -0.4261099100112915, 'logits/rejected': -0.42256179451942444, 'epoch': 0.36} + 36%|███▌ | 244/681 [10:21<18:17, 2.51s/it] 36%|███▌ | 245/681 [10:23<18:43, 2.58s/it] {'loss': 1.1342, 'grad_norm': 24.750608444213867, 'learning_rate': 4.057290731287531e-07, 'fcm_dpo/beta': 0.0028605135157704353, 'fcm_dpo/q_t': 0.4174633324146271, 'fcm_dpo/delta': -0.06291086226701736, 'fcm_dpo/margin': 123.89741516113281, 'margin_dpo/margin_mean': 123.89741516113281, 'margin_dpo/margin_std': 185.7630615234375, 'logps/chosen': -249.43032836914062, 'logps/rejected': -393.70556640625, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'KL/chosen_KL_mean': -182.1680450439453, 'KL/rejected_KL_mean': -306.0654602050781, 'KL/mean': -244.11676025390625, 'KL/std': 164.1088104248047, 'logits/chosen': -0.4355185627937317, 'logits/rejected': -0.4130878448486328, 'epoch': 0.36} + 36%|███▌ | 245/681 [10:23<18:43, 2.58s/it] 36%|███▌ | 246/681 [10:26<18:44, 2.59s/it] {'loss': 1.1283, 'grad_norm': 21.476974487304688, 'learning_rate': 4.047230911780736e-07, 'fcm_dpo/beta': 0.0028710057958960533, 'fcm_dpo/q_t': 0.4186369776725769, 'fcm_dpo/delta': 0.03875650092959404, 'fcm_dpo/margin': 126.31101989746094, 'margin_dpo/margin_mean': 126.31101989746094, 'margin_dpo/margin_std': 195.61752319335938, 'logps/chosen': -244.34207153320312, 'logps/rejected': -388.302490234375, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'KL/chosen_KL_mean': -177.64511108398438, 'KL/rejected_KL_mean': -303.9561462402344, 'KL/mean': -240.8006134033203, 'KL/std': 180.6883544921875, 'logits/chosen': -0.45945310592651367, 'logits/rejected': -0.42501145601272583, 'epoch': 0.36} + 36%|███▌ | 246/681 [10:26<18:44, 2.59s/it] 36%|███▋ | 247/681 [10:28<18:31, 2.56s/it] {'loss': 1.002, 'grad_norm': 30.762731552124023, 'learning_rate': 4.0371303213004814e-07, 'fcm_dpo/beta': 0.002821533940732479, 'fcm_dpo/q_t': 0.3798220753669739, 'fcm_dpo/delta': -0.13242369890213013, 'fcm_dpo/margin': 186.1329345703125, 'margin_dpo/margin_mean': 186.1329345703125, 'margin_dpo/margin_std': 199.94444274902344, 'logps/chosen': -255.9122314453125, 'logps/rejected': -491.73309326171875, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29326629638672, 'KL/chosen_KL_mean': -199.306884765625, 'KL/rejected_KL_mean': -385.4398193359375, 'KL/mean': -292.37335205078125, 'KL/std': 187.60211181640625, 'logits/chosen': -0.3767867386341095, 'logits/rejected': -0.37550073862075806, 'epoch': 0.36} + 36%|███▋ | 247/681 [10:28<18:31, 2.56s/it] 36%|███▋ | 248/681 [10:31<18:28, 2.56s/it] {'loss': 1.0304, 'grad_norm': 29.54060935974121, 'learning_rate': 4.0269892260067197e-07, 'fcm_dpo/beta': 0.002776243956759572, 'fcm_dpo/q_t': 0.3941725492477417, 'fcm_dpo/delta': -0.04965835064649582, 'fcm_dpo/margin': 161.00189208984375, 'margin_dpo/margin_mean': 161.00189208984375, 'margin_dpo/margin_std': 149.979248046875, 'logps/chosen': -212.67184448242188, 'logps/rejected': -421.4873962402344, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'KL/chosen_KL_mean': -168.62863159179688, 'KL/rejected_KL_mean': -329.6305236816406, 'KL/mean': -249.12957763671875, 'KL/std': 152.61151123046875, 'logits/chosen': -0.39201515913009644, 'logits/rejected': -0.411907821893692, 'epoch': 0.36} + 36%|███▋ | 248/681 [10:31<18:28, 2.56s/it] 37%|███▋ | 249/681 [10:33<18:03, 2.51s/it] {'loss': 1.2357, 'grad_norm': 31.686059951782227, 'learning_rate': 4.0168078931267426e-07, 'fcm_dpo/beta': 0.002819925779476762, 'fcm_dpo/q_t': 0.44473278522491455, 'fcm_dpo/delta': 0.07187280803918839, 'fcm_dpo/margin': 86.68631744384766, 'margin_dpo/margin_mean': 86.68630981445312, 'margin_dpo/margin_std': 204.52139282226562, 'logps/chosen': -299.9652099609375, 'logps/rejected': -404.6772155761719, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'KL/chosen_KL_mean': -237.52285766601562, 'KL/rejected_KL_mean': -324.20916748046875, 'KL/mean': -280.86602783203125, 'KL/std': 169.23114013671875, 'logits/chosen': -0.4031098484992981, 'logits/rejected': -0.3796127140522003, 'epoch': 0.37} + 37%|███▋ | 249/681 [10:33<18:03, 2.51s/it] 37%|███▋ | 250/681 [10:36<17:44, 2.47s/it] {'loss': 1.048, 'grad_norm': 39.18155288696289, 'learning_rate': 4.006586590948141e-07, 'fcm_dpo/beta': 0.002813429571688175, 'fcm_dpo/q_t': 0.396010160446167, 'fcm_dpo/delta': -0.050442732870578766, 'fcm_dpo/margin': 159.2041015625, 'margin_dpo/margin_mean': 159.2041015625, 'margin_dpo/margin_std': 176.7471923828125, 'logps/chosen': -282.136962890625, 'logps/rejected': -449.57623291015625, 'logps/ref_chosen': -65.63668823242188, 'logps/ref_rejected': -73.87184143066406, 'KL/chosen_KL_mean': -216.50027465820312, 'KL/rejected_KL_mean': -375.7043762207031, 'KL/mean': -296.10235595703125, 'KL/std': 172.25009155273438, 'logits/chosen': -0.4070359468460083, 'logits/rejected': -0.35379326343536377, 'epoch': 0.37} + 37%|███▋ | 250/681 [10:36<17:44, 2.47s/it] 37%|███▋ | 251/681 [10:38<17:37, 2.46s/it] {'loss': 1.1861, 'grad_norm': 36.757362365722656, 'learning_rate': 3.9963255888117325e-07, 'fcm_dpo/beta': 0.002828112803399563, 'fcm_dpo/q_t': 0.4322122633457184, 'fcm_dpo/delta': 0.10367438197135925, 'fcm_dpo/margin': 105.92207336425781, 'margin_dpo/margin_mean': 105.92207336425781, 'margin_dpo/margin_std': 196.50746154785156, 'logps/chosen': -289.7899475097656, 'logps/rejected': -416.1927490234375, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'KL/chosen_KL_mean': -232.60723876953125, 'KL/rejected_KL_mean': -338.529296875, 'KL/mean': -285.5682678222656, 'KL/std': 180.64505004882812, 'logits/chosen': -0.4116126298904419, 'logits/rejected': -0.37989452481269836, 'epoch': 0.37} + 37%|███▋ | 251/681 [10:38<17:37, 2.46s/it] 37%|███▋ | 252/681 [10:41<17:45, 2.48s/it] {'loss': 1.0723, 'grad_norm': 32.75727081298828, 'learning_rate': 3.9860251571044666e-07, 'fcm_dpo/beta': 0.0028432621620595455, 'fcm_dpo/q_t': 0.4044671356678009, 'fcm_dpo/delta': -0.0013277605175971985, 'fcm_dpo/margin': 141.09707641601562, 'margin_dpo/margin_mean': 141.0970916748047, 'margin_dpo/margin_std': 154.25804138183594, 'logps/chosen': -306.90313720703125, 'logps/rejected': -461.0725402832031, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75799560546875, 'KL/chosen_KL_mean': -235.21749877929688, 'KL/rejected_KL_mean': -376.3145446777344, 'KL/mean': -305.7660217285156, 'KL/std': 156.46197509765625, 'logits/chosen': -0.4887322187423706, 'logits/rejected': -0.4495335817337036, 'epoch': 0.37} + 37%|███▋ | 252/681 [10:41<17:45, 2.48s/it] 37%|███▋ | 253/681 [10:43<18:16, 2.56s/it] {'loss': 1.0725, 'grad_norm': 30.010822296142578, 'learning_rate': 3.9756855672522986e-07, 'fcm_dpo/beta': 0.002854670397937298, 'fcm_dpo/q_t': 0.40049469470977783, 'fcm_dpo/delta': -0.03276565670967102, 'fcm_dpo/margin': 150.87002563476562, 'margin_dpo/margin_mean': 150.87002563476562, 'margin_dpo/margin_std': 187.1278533935547, 'logps/chosen': -282.01446533203125, 'logps/rejected': -462.4530944824219, 'logps/ref_chosen': -69.1339340209961, 'logps/ref_rejected': -98.70252990722656, 'KL/chosen_KL_mean': -212.88052368164062, 'KL/rejected_KL_mean': -363.75054931640625, 'KL/mean': -288.3155517578125, 'KL/std': 172.37203979492188, 'logits/chosen': -0.4635479152202606, 'logits/rejected': -0.4555034637451172, 'epoch': 0.37} + 37%|███▋ | 253/681 [10:43<18:16, 2.56s/it] 37%|███▋ | 254/681 [10:46<18:24, 2.59s/it] {'loss': 1.1518, 'grad_norm': 27.448017120361328, 'learning_rate': 3.965307091713037e-07, 'fcm_dpo/beta': 0.0028438782319426537, 'fcm_dpo/q_t': 0.42061156034469604, 'fcm_dpo/delta': 0.03458229452371597, 'fcm_dpo/margin': 128.94290161132812, 'margin_dpo/margin_mean': 128.94290161132812, 'margin_dpo/margin_std': 234.24417114257812, 'logps/chosen': -273.0848388671875, 'logps/rejected': -438.18035888671875, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'KL/chosen_KL_mean': -218.92982482910156, 'KL/rejected_KL_mean': -347.8727111816406, 'KL/mean': -283.40130615234375, 'KL/std': 189.0523223876953, 'logits/chosen': -0.42898088693618774, 'logits/rejected': -0.412333220243454, 'epoch': 0.37} + 37%|███▋ | 254/681 [10:46<18:24, 2.59s/it] 37%|███▋ | 255/681 [10:48<17:58, 2.53s/it] {'loss': 1.1249, 'grad_norm': 25.36671257019043, 'learning_rate': 3.954890003969163e-07, 'fcm_dpo/beta': 0.0028422600589692593, 'fcm_dpo/q_t': 0.4121626913547516, 'fcm_dpo/delta': 0.014506950974464417, 'fcm_dpo/margin': 135.67572021484375, 'margin_dpo/margin_mean': 135.67572021484375, 'margin_dpo/margin_std': 212.42404174804688, 'logps/chosen': -278.57037353515625, 'logps/rejected': -447.31298828125, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'KL/chosen_KL_mean': -221.4287109375, 'KL/rejected_KL_mean': -357.1044006347656, 'KL/mean': -289.2665710449219, 'KL/std': 166.446533203125, 'logits/chosen': -0.39531874656677246, 'logits/rejected': -0.3849487900733948, 'epoch': 0.37} + 37%|███▋ | 255/681 [10:48<17:58, 2.53s/it] 38%|███▊ | 256/681 [10:51<17:58, 2.54s/it] {'loss': 1.0753, 'grad_norm': 22.90846061706543, 'learning_rate': 3.944434578520628e-07, 'fcm_dpo/beta': 0.0028450002428144217, 'fcm_dpo/q_t': 0.40111541748046875, 'fcm_dpo/delta': -0.03324428200721741, 'fcm_dpo/margin': 151.77967834472656, 'margin_dpo/margin_mean': 151.77967834472656, 'margin_dpo/margin_std': 200.09576416015625, 'logps/chosen': -237.39138793945312, 'logps/rejected': -426.57049560546875, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'KL/chosen_KL_mean': -182.2279052734375, 'KL/rejected_KL_mean': -334.0075988769531, 'KL/mean': -258.11773681640625, 'KL/std': 175.14785766601562, 'logits/chosen': -0.37317514419555664, 'logits/rejected': -0.38296449184417725, 'epoch': 0.38} + 38%|███▊ | 256/681 [10:51<17:58, 2.54s/it] 38%|███▊ | 257/681 [10:54<18:11, 2.57s/it] {'loss': 1.079, 'grad_norm': 23.42417335510254, 'learning_rate': 3.933941090877615e-07, 'fcm_dpo/beta': 0.0028111585415899754, 'fcm_dpo/q_t': 0.40255630016326904, 'fcm_dpo/delta': -0.03062574565410614, 'fcm_dpo/margin': 152.44454956054688, 'margin_dpo/margin_mean': 152.4445343017578, 'margin_dpo/margin_std': 199.14427185058594, 'logps/chosen': -221.02597045898438, 'logps/rejected': -403.5847473144531, 'logps/ref_chosen': -49.42369842529297, 'logps/ref_rejected': -79.53791809082031, 'KL/chosen_KL_mean': -171.60227966308594, 'KL/rejected_KL_mean': -324.04681396484375, 'KL/mean': -247.82455444335938, 'KL/std': 171.75497436523438, 'logits/chosen': -0.37235432863235474, 'logits/rejected': -0.3574731945991516, 'epoch': 0.38} + 38%|███▊ | 257/681 [10:54<18:11, 2.57s/it] 38%|███▊ | 258/681 [10:56<17:27, 2.48s/it] {'loss': 1.0923, 'grad_norm': 21.441373825073242, 'learning_rate': 3.923409817553284e-07, 'fcm_dpo/beta': 0.0028177620843052864, 'fcm_dpo/q_t': 0.40266942977905273, 'fcm_dpo/delta': -0.022539909929037094, 'fcm_dpo/margin': 149.61135864257812, 'margin_dpo/margin_mean': 149.61135864257812, 'margin_dpo/margin_std': 213.09808349609375, 'logps/chosen': -267.9707336425781, 'logps/rejected': -454.18804931640625, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.99010467529297, 'KL/chosen_KL_mean': -208.58660888671875, 'KL/rejected_KL_mean': -358.19793701171875, 'KL/mean': -283.39227294921875, 'KL/std': 177.68453979492188, 'logits/chosen': -0.38592028617858887, 'logits/rejected': -0.38714897632598877, 'epoch': 0.38} + 38%|███▊ | 258/681 [10:56<17:27, 2.48s/it] 38%|███▊ | 259/681 [10:58<17:29, 2.49s/it] {'loss': 1.1409, 'grad_norm': 20.104839324951172, 'learning_rate': 3.9128410360564793e-07, 'fcm_dpo/beta': 0.0028243116103112698, 'fcm_dpo/q_t': 0.4213777184486389, 'fcm_dpo/delta': 0.056878622621297836, 'fcm_dpo/margin': 122.1714859008789, 'margin_dpo/margin_mean': 122.17149353027344, 'margin_dpo/margin_std': 192.09332275390625, 'logps/chosen': -243.62847900390625, 'logps/rejected': -402.1632995605469, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.191650390625, 'KL/chosen_KL_mean': -190.80014038085938, 'KL/rejected_KL_mean': -312.9716491699219, 'KL/mean': -251.88589477539062, 'KL/std': 164.8191375732422, 'logits/chosen': -0.43218350410461426, 'logits/rejected': -0.43397071957588196, 'epoch': 0.38} + 38%|███▊ | 259/681 [10:58<17:29, 2.49s/it] 38%|███▊ | 260/681 [11:01<17:51, 2.54s/it] {'loss': 1.0368, 'grad_norm': 28.360965728759766, 'learning_rate': 3.9022350248844246e-07, 'fcm_dpo/beta': 0.0028252771589905024, 'fcm_dpo/q_t': 0.39523473381996155, 'fcm_dpo/delta': -0.06110315024852753, 'fcm_dpo/margin': 162.20968627929688, 'margin_dpo/margin_mean': 162.20970153808594, 'margin_dpo/margin_std': 178.4059295654297, 'logps/chosen': -227.02001953125, 'logps/rejected': -436.90185546875, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08978271484375, 'KL/chosen_KL_mean': -179.60235595703125, 'KL/rejected_KL_mean': -341.81207275390625, 'KL/mean': -260.7071838378906, 'KL/std': 161.13522338867188, 'logits/chosen': -0.3964114189147949, 'logits/rejected': -0.41389453411102295, 'epoch': 0.38} + 38%|███▊ | 260/681 [11:01<17:51, 2.54s/it] 38%|███▊ | 261/681 [11:03<17:08, 2.45s/it] {'loss': 1.0807, 'grad_norm': 19.33860969543457, 'learning_rate': 3.891592063515376e-07, 'fcm_dpo/beta': 0.0027923104353249073, 'fcm_dpo/q_t': 0.404338002204895, 'fcm_dpo/delta': -0.022975105792284012, 'fcm_dpo/margin': 151.08978271484375, 'margin_dpo/margin_mean': 151.08978271484375, 'margin_dpo/margin_std': 202.36895751953125, 'logps/chosen': -230.94192504882812, 'logps/rejected': -417.5152893066406, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'KL/chosen_KL_mean': -177.91055297851562, 'KL/rejected_KL_mean': -329.0003356933594, 'KL/mean': -253.4554443359375, 'KL/std': 176.36572265625, 'logits/chosen': -0.344787061214447, 'logits/rejected': -0.3448353409767151, 'epoch': 0.38} + 38%|███▊ | 261/681 [11:03<17:08, 2.45s/it] 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] {'loss': 1.1113, 'grad_norm': 22.37173843383789, 'learning_rate': 3.880912432401264e-07, 'fcm_dpo/beta': 0.002820716006681323, 'fcm_dpo/q_t': 0.41728508472442627, 'fcm_dpo/delta': 0.04834378883242607, 'fcm_dpo/margin': 125.2067642211914, 'margin_dpo/margin_mean': 125.2067642211914, 'margin_dpo/margin_std': 159.09869384765625, 'logps/chosen': -268.8022766113281, 'logps/rejected': -420.80743408203125, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'KL/chosen_KL_mean': -209.18212890625, 'KL/rejected_KL_mean': -334.3888854980469, 'KL/mean': -271.7855224609375, 'KL/std': 161.00527954101562, 'logits/chosen': -0.37253305315971375, 'logits/rejected': -0.349983811378479, 'epoch': 0.38} + 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] {'loss': 1.0312, 'grad_norm': 20.149484634399414, 'learning_rate': 3.870196412960302e-07, 'fcm_dpo/beta': 0.0027740350924432278, 'fcm_dpo/q_t': 0.3883536458015442, 'fcm_dpo/delta': -0.09155195951461792, 'fcm_dpo/margin': 175.40493774414062, 'margin_dpo/margin_mean': 175.40493774414062, 'margin_dpo/margin_std': 203.16934204101562, 'logps/chosen': -255.53387451171875, 'logps/rejected': -468.3750915527344, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'KL/chosen_KL_mean': -196.1129150390625, 'KL/rejected_KL_mean': -371.51788330078125, 'KL/mean': -283.8154296875, 'KL/std': 189.73565673828125, 'logits/chosen': -0.38992154598236084, 'logits/rejected': -0.36442500352859497, 'epoch': 0.39} + 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] 39%|███▉ | 264/681 [11:11<17:30, 2.52s/it] {'loss': 1.0828, 'grad_norm': 21.269851684570312, 'learning_rate': 3.8594442875695665e-07, 'fcm_dpo/beta': 0.0027424870058894157, 'fcm_dpo/q_t': 0.4036809206008911, 'fcm_dpo/delta': -0.022643636912107468, 'fcm_dpo/margin': 153.38145446777344, 'margin_dpo/margin_mean': 153.38145446777344, 'margin_dpo/margin_std': 199.32289123535156, 'logps/chosen': -277.86090087890625, 'logps/rejected': -462.37646484375, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85620880126953, 'KL/chosen_KL_mean': -215.1387939453125, 'KL/rejected_KL_mean': -368.520263671875, 'KL/mean': -291.82952880859375, 'KL/std': 174.2871856689453, 'logits/chosen': -0.4510270953178406, 'logits/rejected': -0.44331079721450806, 'epoch': 0.39} + 39%|███▉ | 264/681 [11:11<17:30, 2.52s/it] 39%|███▉ | 265/681 [11:13<17:28, 2.52s/it] {'loss': 1.1456, 'grad_norm': 26.573469161987305, 'learning_rate': 3.848656339557562e-07, 'fcm_dpo/beta': 0.0027675144374370575, 'fcm_dpo/q_t': 0.4150318503379822, 'fcm_dpo/delta': 0.01740371063351631, 'fcm_dpo/margin': 138.47540283203125, 'margin_dpo/margin_mean': 138.47540283203125, 'margin_dpo/margin_std': 247.60830688476562, 'logps/chosen': -310.6446533203125, 'logps/rejected': -475.169189453125, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'KL/chosen_KL_mean': -248.67318725585938, 'KL/rejected_KL_mean': -387.1485900878906, 'KL/mean': -317.910888671875, 'KL/std': 200.890380859375, 'logits/chosen': -0.3928653597831726, 'logits/rejected': -0.37691766023635864, 'epoch': 0.39} + 39%|███▉ | 265/681 [11:13<17:28, 2.52s/it] 39%|███▉ | 266/681 [11:16<17:21, 2.51s/it] {'loss': 1.1671, 'grad_norm': 47.3087272644043, 'learning_rate': 3.8378328531967507e-07, 'fcm_dpo/beta': 0.0028024273924529552, 'fcm_dpo/q_t': 0.4282206892967224, 'fcm_dpo/delta': 0.08327002823352814, 'fcm_dpo/margin': 113.96560668945312, 'margin_dpo/margin_mean': 113.96561431884766, 'margin_dpo/margin_std': 202.73809814453125, 'logps/chosen': -319.6100769042969, 'logps/rejected': -434.44720458984375, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'KL/chosen_KL_mean': -252.51040649414062, 'KL/rejected_KL_mean': -366.4759826660156, 'KL/mean': -309.49322509765625, 'KL/std': 169.7954864501953, 'logits/chosen': -0.4444202184677124, 'logits/rejected': -0.40275368094444275, 'epoch': 0.39} + 39%|███▉ | 266/681 [11:16<17:21, 2.51s/it] 39%|███▉ | 267/681 [11:18<17:23, 2.52s/it] {'loss': 1.1036, 'grad_norm': 42.9015007019043, 'learning_rate': 3.8269741136960646e-07, 'fcm_dpo/beta': 0.002801567316055298, 'fcm_dpo/q_t': 0.4081183075904846, 'fcm_dpo/delta': -0.007594583556056023, 'fcm_dpo/margin': 145.2933349609375, 'margin_dpo/margin_mean': 145.2933349609375, 'margin_dpo/margin_std': 216.03964233398438, 'logps/chosen': -299.02911376953125, 'logps/rejected': -465.5201416015625, 'logps/ref_chosen': -68.97075653076172, 'logps/ref_rejected': -90.16844940185547, 'KL/chosen_KL_mean': -230.05838012695312, 'KL/rejected_KL_mean': -375.3517150878906, 'KL/mean': -302.7050476074219, 'KL/std': 186.0106964111328, 'logits/chosen': -0.45946192741394043, 'logits/rejected': -0.4249149560928345, 'epoch': 0.39} + 39%|███▉ | 267/681 [11:18<17:23, 2.52s/it] 39%|███▉ | 268/681 [11:21<17:24, 2.53s/it] {'loss': 1.0956, 'grad_norm': 40.570281982421875, 'learning_rate': 3.8160804071933894e-07, 'fcm_dpo/beta': 0.002796788001433015, 'fcm_dpo/q_t': 0.40683937072753906, 'fcm_dpo/delta': -0.014621859416365623, 'fcm_dpo/margin': 147.9793243408203, 'margin_dpo/margin_mean': 147.9793243408203, 'margin_dpo/margin_std': 214.23509216308594, 'logps/chosen': -296.368408203125, 'logps/rejected': -490.0950927734375, 'logps/ref_chosen': -55.90031051635742, 'logps/ref_rejected': -101.64763641357422, 'KL/chosen_KL_mean': -240.4680938720703, 'KL/rejected_KL_mean': -388.44744873046875, 'KL/mean': -314.457763671875, 'KL/std': 178.60980224609375, 'logits/chosen': -0.4454725980758667, 'logits/rejected': -0.4506447911262512, 'epoch': 0.39} + 39%|███▉ | 268/681 [11:21<17:24, 2.53s/it] 40%|███▉ | 269/681 [11:23<17:12, 2.51s/it] {'loss': 1.0666, 'grad_norm': 46.9615364074707, 'learning_rate': 3.8051520207480204e-07, 'fcm_dpo/beta': 0.0027706455439329147, 'fcm_dpo/q_t': 0.39228904247283936, 'fcm_dpo/delta': -0.07959494739770889, 'fcm_dpo/margin': 171.74826049804688, 'margin_dpo/margin_mean': 171.74826049804688, 'margin_dpo/margin_std': 238.62860107421875, 'logps/chosen': -337.2974853515625, 'logps/rejected': -546.3555297851562, 'logps/ref_chosen': -70.03955841064453, 'logps/ref_rejected': -107.34937286376953, 'KL/chosen_KL_mean': -267.2579345703125, 'KL/rejected_KL_mean': -439.00616455078125, 'KL/mean': -353.1320495605469, 'KL/std': 187.59320068359375, 'logits/chosen': -0.4752381443977356, 'logits/rejected': -0.4565969407558441, 'epoch': 0.4} + 40%|███▉ | 269/681 [11:23<17:12, 2.51s/it] 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] {'loss': 1.1284, 'grad_norm': 37.03840255737305, 'learning_rate': 3.794189242333106e-07, 'fcm_dpo/beta': 0.0027725521940737963, 'fcm_dpo/q_t': 0.41288816928863525, 'fcm_dpo/delta': 0.015092555433511734, 'fcm_dpo/margin': 138.9442901611328, 'margin_dpo/margin_mean': 138.94430541992188, 'margin_dpo/margin_std': 224.98004150390625, 'logps/chosen': -316.7245178222656, 'logps/rejected': -496.06396484375, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'KL/chosen_KL_mean': -247.1910400390625, 'KL/rejected_KL_mean': -386.13531494140625, 'KL/mean': -316.6631774902344, 'KL/std': 175.74114990234375, 'logits/chosen': -0.49404847621917725, 'logits/rejected': -0.48589587211608887, 'epoch': 0.4} + 40%|███▉ | 270/681 [11:26<17:31, 2.56s/it] 40%|███▉ | 271/681 [11:28<17:10, 2.51s/it] {'loss': 1.036, 'grad_norm': 28.625146865844727, 'learning_rate': 3.7831923608280514e-07, 'fcm_dpo/beta': 0.002722542965784669, 'fcm_dpo/q_t': 0.3910220265388489, 'fcm_dpo/delta': -0.08434660732746124, 'fcm_dpo/margin': 176.25228881835938, 'margin_dpo/margin_mean': 176.25230407714844, 'margin_dpo/margin_std': 207.7333526611328, 'logps/chosen': -289.5682067871094, 'logps/rejected': -501.56976318359375, 'logps/ref_chosen': -56.76456832885742, 'logps/ref_rejected': -92.51383972167969, 'KL/chosen_KL_mean': -232.8036346435547, 'KL/rejected_KL_mean': -409.055908203125, 'KL/mean': -320.9297790527344, 'KL/std': 184.5279541015625, 'logits/chosen': -0.412489652633667, 'logits/rejected': -0.39746084809303284, 'epoch': 0.4} + 40%|███▉ | 271/681 [11:28<17:10, 2.51s/it] 40%|███▉ | 272/681 [11:31<17:32, 2.57s/it] {'loss': 0.9588, 'grad_norm': 28.886754989624023, 'learning_rate': 3.772161666010912e-07, 'fcm_dpo/beta': 0.002650283742696047, 'fcm_dpo/q_t': 0.3656819462776184, 'fcm_dpo/delta': -0.1989383101463318, 'fcm_dpo/margin': 221.74072265625, 'margin_dpo/margin_mean': 221.74072265625, 'margin_dpo/margin_std': 217.42919921875, 'logps/chosen': -277.6108703613281, 'logps/rejected': -555.397216796875, 'logps/ref_chosen': -49.497154235839844, 'logps/ref_rejected': -105.54279327392578, 'KL/chosen_KL_mean': -228.11370849609375, 'KL/rejected_KL_mean': -449.85443115234375, 'KL/mean': -338.98406982421875, 'KL/std': 208.9947052001953, 'logits/chosen': -0.3776736557483673, 'logits/rejected': -0.39134037494659424, 'epoch': 0.4} + 40%|███▉ | 272/681 [11:31<17:32, 2.57s/it] 40%|████ | 273/681 [11:34<17:09, 2.52s/it] {'loss': 1.0197, 'grad_norm': 22.654264450073242, 'learning_rate': 3.761097448550755e-07, 'fcm_dpo/beta': 0.0025578399654477835, 'fcm_dpo/q_t': 0.38167691230773926, 'fcm_dpo/delta': -0.130665123462677, 'fcm_dpo/margin': 204.66293334960938, 'margin_dpo/margin_mean': 204.6629638671875, 'margin_dpo/margin_std': 246.62765502929688, 'logps/chosen': -322.691650390625, 'logps/rejected': -556.8778076171875, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'KL/chosen_KL_mean': -259.71624755859375, 'KL/rejected_KL_mean': -464.37921142578125, 'KL/mean': -362.0477294921875, 'KL/std': 212.40911865234375, 'logits/chosen': -0.3508095145225525, 'logits/rejected': -0.33492064476013184, 'epoch': 0.4} + 40%|████ | 273/681 [11:34<17:09, 2.52s/it] 40%|████ | 274/681 [11:36<16:48, 2.48s/it] {'loss': 1.1192, 'grad_norm': 26.790353775024414, 'learning_rate': 3.75e-07, 'fcm_dpo/beta': 0.002562238136306405, 'fcm_dpo/q_t': 0.4136476218700409, 'fcm_dpo/delta': 0.018024658784270287, 'fcm_dpo/margin': 149.21621704101562, 'margin_dpo/margin_mean': 149.21621704101562, 'margin_dpo/margin_std': 225.90634155273438, 'logps/chosen': -344.27886962890625, 'logps/rejected': -515.1604614257812, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'KL/chosen_KL_mean': -288.61114501953125, 'KL/rejected_KL_mean': -437.827392578125, 'KL/mean': -363.21929931640625, 'KL/std': 188.80880737304688, 'logits/chosen': -0.31436455249786377, 'logits/rejected': -0.299676775932312, 'epoch': 0.4} + 40%|████ | 274/681 [11:36<16:48, 2.48s/it] 40%|████ | 275/681 [11:39<17:09, 2.54s/it] {'loss': 1.0756, 'grad_norm': 23.10381507873535, 'learning_rate': 3.738869612786737e-07, 'fcm_dpo/beta': 0.0025526927784085274, 'fcm_dpo/q_t': 0.40220552682876587, 'fcm_dpo/delta': -0.026383230462670326, 'fcm_dpo/margin': 166.55404663085938, 'margin_dpo/margin_mean': 166.5540313720703, 'margin_dpo/margin_std': 212.90811157226562, 'logps/chosen': -266.65557861328125, 'logps/rejected': -477.91864013671875, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'KL/chosen_KL_mean': -218.06088256835938, 'KL/rejected_KL_mean': -384.61492919921875, 'KL/mean': -301.337890625, 'KL/std': 183.7950439453125, 'logits/chosen': -0.3793821334838867, 'logits/rejected': -0.38762110471725464, 'epoch': 0.4} + 40%|████ | 275/681 [11:39<17:09, 2.54s/it] 41%|████ | 276/681 [11:41<17:01, 2.52s/it] {'loss': 1.1294, 'grad_norm': 25.779754638671875, 'learning_rate': 3.7277065802070204e-07, 'fcm_dpo/beta': 0.00254382798448205, 'fcm_dpo/q_t': 0.4147305190563202, 'fcm_dpo/delta': 0.015904389321804047, 'fcm_dpo/margin': 151.22344970703125, 'margin_dpo/margin_mean': 151.22344970703125, 'margin_dpo/margin_std': 247.93980407714844, 'logps/chosen': -284.5297546386719, 'logps/rejected': -449.5414733886719, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'KL/chosen_KL_mean': -227.9523468017578, 'KL/rejected_KL_mean': -379.1758117675781, 'KL/mean': -303.5640869140625, 'KL/std': 192.03842163085938, 'logits/chosen': -0.369443416595459, 'logits/rejected': -0.34958142042160034, 'epoch': 0.41} + 41%|████ | 276/681 [11:41<17:01, 2.52s/it] 41%|████ | 277/681 [11:43<16:28, 2.45s/it] {'loss': 1.0789, 'grad_norm': 28.24149513244629, 'learning_rate': 3.71651119641714e-07, 'fcm_dpo/beta': 0.002545831026509404, 'fcm_dpo/q_t': 0.4027344584465027, 'fcm_dpo/delta': -0.024078505113720894, 'fcm_dpo/margin': 166.16348266601562, 'margin_dpo/margin_mean': 166.16348266601562, 'margin_dpo/margin_std': 216.53277587890625, 'logps/chosen': -291.17364501953125, 'logps/rejected': -493.94683837890625, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'KL/chosen_KL_mean': -234.90208435058594, 'KL/rejected_KL_mean': -401.0655517578125, 'KL/mean': -317.98382568359375, 'KL/std': 185.35687255859375, 'logits/chosen': -0.3797181248664856, 'logits/rejected': -0.36499595642089844, 'epoch': 0.41} + 41%|████ | 277/681 [11:43<16:28, 2.45s/it] 41%|████ | 278/681 [11:46<16:53, 2.51s/it] {'loss': 1.0374, 'grad_norm': 28.204530715942383, 'learning_rate': 3.705283756425872e-07, 'fcm_dpo/beta': 0.0024995177518576384, 'fcm_dpo/q_t': 0.3918403387069702, 'fcm_dpo/delta': -0.07845177501440048, 'fcm_dpo/margin': 189.73814392089844, 'margin_dpo/margin_mean': 189.73814392089844, 'margin_dpo/margin_std': 220.7503662109375, 'logps/chosen': -253.83644104003906, 'logps/rejected': -481.8862609863281, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'KL/chosen_KL_mean': -200.89450073242188, 'KL/rejected_KL_mean': -390.6326904296875, 'KL/mean': -295.7635803222656, 'KL/std': 192.54470825195312, 'logits/chosen': -0.3621584475040436, 'logits/rejected': -0.36707815527915955, 'epoch': 0.41} + 41%|████ | 278/681 [11:46<16:53, 2.51s/it] 41%|████ | 279/681 [11:49<16:53, 2.52s/it] {'loss': 1.0546, 'grad_norm': 28.575786590576172, 'learning_rate': 3.6940245560867e-07, 'fcm_dpo/beta': 0.002455736044794321, 'fcm_dpo/q_t': 0.3932103216648102, 'fcm_dpo/delta': -0.07850091904401779, 'fcm_dpo/margin': 192.94378662109375, 'margin_dpo/margin_mean': 192.94378662109375, 'margin_dpo/margin_std': 249.06761169433594, 'logps/chosen': -283.1942138671875, 'logps/rejected': -515.34814453125, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'KL/chosen_KL_mean': -234.5529022216797, 'KL/rejected_KL_mean': -427.4967041015625, 'KL/mean': -331.0247802734375, 'KL/std': 201.0699462890625, 'logits/chosen': -0.30353468656539917, 'logits/rejected': -0.3015468418598175, 'epoch': 0.41} + 41%|████ | 279/681 [11:49<16:53, 2.52s/it] 41%|████ | 280/681 [11:51<17:07, 2.56s/it] {'loss': 1.0293, 'grad_norm': 25.460493087768555, 'learning_rate': 3.6827338920900253e-07, 'fcm_dpo/beta': 0.002430729568004608, 'fcm_dpo/q_t': 0.39016252756118774, 'fcm_dpo/delta': -0.07191157341003418, 'fcm_dpo/margin': 192.72705078125, 'margin_dpo/margin_mean': 192.72706604003906, 'margin_dpo/margin_std': 203.84701538085938, 'logps/chosen': -295.58209228515625, 'logps/rejected': -528.130859375, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'KL/chosen_KL_mean': -236.78497314453125, 'KL/rejected_KL_mean': -429.51202392578125, 'KL/mean': -333.14849853515625, 'KL/std': 184.6090087890625, 'logits/chosen': -0.33522510528564453, 'logits/rejected': -0.34023696184158325, 'epoch': 0.41} + 41%|████ | 280/681 [11:51<17:07, 2.56s/it] 41%|████▏ | 281/681 [11:54<17:05, 2.56s/it] {'loss': 1.0649, 'grad_norm': 25.438335418701172, 'learning_rate': 3.6714120619553435e-07, 'fcm_dpo/beta': 0.002424264792352915, 'fcm_dpo/q_t': 0.3996432423591614, 'fcm_dpo/delta': -0.024164361879229546, 'fcm_dpo/margin': 174.46792602539062, 'margin_dpo/margin_mean': 174.46792602539062, 'margin_dpo/margin_std': 200.58334350585938, 'logps/chosen': -282.3760070800781, 'logps/rejected': -482.23797607421875, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'KL/chosen_KL_mean': -226.8874969482422, 'KL/rejected_KL_mean': -401.35540771484375, 'KL/mean': -314.1214599609375, 'KL/std': 185.2099151611328, 'logits/chosen': -0.3693084716796875, 'logits/rejected': -0.34565502405166626, 'epoch': 0.41} + 41%|████▏ | 281/681 [11:54<17:05, 2.56s/it] 41%|████▏ | 282/681 [11:56<16:57, 2.55s/it] {'loss': 1.1493, 'grad_norm': 21.817731857299805, 'learning_rate': 3.660059364023408e-07, 'fcm_dpo/beta': 0.0024102902971208096, 'fcm_dpo/q_t': 0.42627450823783875, 'fcm_dpo/delta': -0.00805886834859848, 'fcm_dpo/margin': 134.1021728515625, 'margin_dpo/margin_mean': 134.1021728515625, 'margin_dpo/margin_std': 210.12014770507812, 'logps/chosen': -327.8204040527344, 'logps/rejected': -484.20343017578125, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'KL/chosen_KL_mean': -254.75025939941406, 'KL/rejected_KL_mean': -388.85247802734375, 'KL/mean': -321.8013610839844, 'KL/std': 179.27874755859375, 'logits/chosen': -0.4306085705757141, 'logits/rejected': -0.4117346405982971, 'epoch': 0.41} + 41%|████▏ | 282/681 [11:56<16:57, 2.55s/it] 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] {'loss': 1.025, 'grad_norm': 38.63431167602539, 'learning_rate': 3.6486760974483685e-07, 'fcm_dpo/beta': 0.002389241009950638, 'fcm_dpo/q_t': 0.3880097270011902, 'fcm_dpo/delta': -0.08176899701356888, 'fcm_dpo/margin': 199.99966430664062, 'margin_dpo/margin_mean': 199.99964904785156, 'margin_dpo/margin_std': 207.48794555664062, 'logps/chosen': -317.56396484375, 'logps/rejected': -552.6517333984375, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'KL/chosen_KL_mean': -255.66551208496094, 'KL/rejected_KL_mean': -455.6651611328125, 'KL/mean': -355.66534423828125, 'KL/std': 223.33087158203125, 'logits/chosen': -0.38895383477211, 'logits/rejected': -0.3880043029785156, 'epoch': 0.42} + 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] 42%|████▏ | 284/681 [12:02<17:11, 2.60s/it] {'loss': 1.0387, 'grad_norm': 31.127235412597656, 'learning_rate': 3.6372625621898863e-07, 'fcm_dpo/beta': 0.00234918761998415, 'fcm_dpo/q_t': 0.3954547643661499, 'fcm_dpo/delta': -0.050124749541282654, 'fcm_dpo/margin': 190.62234497070312, 'margin_dpo/margin_mean': 190.62232971191406, 'margin_dpo/margin_std': 199.58392333984375, 'logps/chosen': -310.62255859375, 'logps/rejected': -536.278564453125, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'KL/chosen_KL_mean': -252.18701171875, 'KL/rejected_KL_mean': -442.809326171875, 'KL/mean': -347.4981689453125, 'KL/std': 203.94338989257812, 'logits/chosen': -0.4325849413871765, 'logits/rejected': -0.4140619933605194, 'epoch': 0.42} + 42%|████▏ | 284/681 [12:02<17:11, 2.60s/it] 42%|████▏ | 285/681 [12:04<17:08, 2.60s/it] {'loss': 1.0734, 'grad_norm': 24.421079635620117, 'learning_rate': 3.625819059005228e-07, 'fcm_dpo/beta': 0.0023565019946545362, 'fcm_dpo/q_t': 0.40418434143066406, 'fcm_dpo/delta': -0.005000069737434387, 'fcm_dpo/margin': 171.5524444580078, 'margin_dpo/margin_mean': 171.5524444580078, 'margin_dpo/margin_std': 191.03448486328125, 'logps/chosen': -341.8755187988281, 'logps/rejected': -546.3225708007812, 'logps/ref_chosen': -66.23219299316406, 'logps/ref_rejected': -99.1268310546875, 'KL/chosen_KL_mean': -275.643310546875, 'KL/rejected_KL_mean': -447.19573974609375, 'KL/mean': -361.4195556640625, 'KL/std': 183.0438232421875, 'logits/chosen': -0.40086328983306885, 'logits/rejected': -0.38890522718429565, 'epoch': 0.42} + 42%|████▏ | 285/681 [12:04<17:08, 2.60s/it] 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] {'loss': 1.0568, 'grad_norm': 21.488588333129883, 'learning_rate': 3.614345889441346e-07, 'fcm_dpo/beta': 0.002331117633730173, 'fcm_dpo/q_t': 0.397432416677475, 'fcm_dpo/delta': -0.042728979140520096, 'fcm_dpo/margin': 189.12229919433594, 'margin_dpo/margin_mean': 189.12228393554688, 'margin_dpo/margin_std': 223.15341186523438, 'logps/chosen': -352.1898193359375, 'logps/rejected': -556.9495849609375, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'KL/chosen_KL_mean': -279.2388000488281, 'KL/rejected_KL_mean': -468.3611145019531, 'KL/mean': -373.7999572753906, 'KL/std': 205.68328857421875, 'logits/chosen': -0.39442330598831177, 'logits/rejected': -0.37771064043045044, 'epoch': 0.42} + 42%|████▏ | 286/681 [12:07<16:56, 2.57s/it] 42%|████▏ | 287/681 [12:09<16:18, 2.48s/it] {'loss': 1.1282, 'grad_norm': 31.77181625366211, 'learning_rate': 3.6028433558269275e-07, 'fcm_dpo/beta': 0.0023477966897189617, 'fcm_dpo/q_t': 0.42086952924728394, 'fcm_dpo/delta': 0.060965895652770996, 'fcm_dpo/margin': 145.24075317382812, 'margin_dpo/margin_mean': 145.24075317382812, 'margin_dpo/margin_std': 204.16952514648438, 'logps/chosen': -329.3837890625, 'logps/rejected': -490.7794494628906, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.69607543945312, 'KL/chosen_KL_mean': -267.8426513671875, 'KL/rejected_KL_mean': -413.0833740234375, 'KL/mean': -340.4630126953125, 'KL/std': 175.79714965820312, 'logits/chosen': -0.3862367272377014, 'logits/rejected': -0.3612860441207886, 'epoch': 0.42} + 42%|████▏ | 287/681 [12:09<16:18, 2.48s/it] 42%|████▏ | 288/681 [12:12<16:24, 2.51s/it] {'loss': 1.0438, 'grad_norm': 21.244749069213867, 'learning_rate': 3.5913117612644327e-07, 'fcm_dpo/beta': 0.0023200467694550753, 'fcm_dpo/q_t': 0.39606454968452454, 'fcm_dpo/delta': -0.04565563425421715, 'fcm_dpo/margin': 190.696044921875, 'margin_dpo/margin_mean': 190.696044921875, 'margin_dpo/margin_std': 194.03762817382812, 'logps/chosen': -315.1396484375, 'logps/rejected': -536.5101928710938, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.33570098876953, 'KL/chosen_KL_mean': -258.47845458984375, 'KL/rejected_KL_mean': -449.17449951171875, 'KL/mean': -353.82647705078125, 'KL/std': 181.2964630126953, 'logits/chosen': -0.4210980534553528, 'logits/rejected': -0.4073001742362976, 'epoch': 0.42} + 42%|████▏ | 288/681 [12:12<16:24, 2.51s/it] 42%|████▏ | 289/681 [12:14<16:22, 2.51s/it] {'loss': 1.0151, 'grad_norm': 25.482728958129883, 'learning_rate': 3.5797514096221024e-07, 'fcm_dpo/beta': 0.0023045637644827366, 'fcm_dpo/q_t': 0.38599973917007446, 'fcm_dpo/delta': -0.09854762256145477, 'fcm_dpo/margin': 214.25892639160156, 'margin_dpo/margin_mean': 214.25892639160156, 'margin_dpo/margin_std': 223.70123291015625, 'logps/chosen': -283.3717346191406, 'logps/rejected': -540.0429077148438, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'KL/chosen_KL_mean': -238.14132690429688, 'KL/rejected_KL_mean': -452.4002380371094, 'KL/mean': -345.27081298828125, 'KL/std': 204.56678771972656, 'logits/chosen': -0.34231024980545044, 'logits/rejected': -0.3436782956123352, 'epoch': 0.42} + 42%|████▏ | 289/681 [12:14<16:22, 2.51s/it] 43%|████▎ | 290/681 [12:17<16:22, 2.51s/it] {'loss': 1.0375, 'grad_norm': 21.68331527709961, 'learning_rate': 3.568162605525952e-07, 'fcm_dpo/beta': 0.002251718658953905, 'fcm_dpo/q_t': 0.39020198583602905, 'fcm_dpo/delta': -0.08903662860393524, 'fcm_dpo/margin': 215.1958465576172, 'margin_dpo/margin_mean': 215.1958465576172, 'margin_dpo/margin_std': 261.9568786621094, 'logps/chosen': -317.1899108886719, 'logps/rejected': -593.622802734375, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'KL/chosen_KL_mean': -261.7184143066406, 'KL/rejected_KL_mean': -476.91424560546875, 'KL/mean': -369.31634521484375, 'KL/std': 220.097900390625, 'logits/chosen': -0.3297966718673706, 'logits/rejected': -0.35255828499794006, 'epoch': 0.43} + 43%|████▎ | 290/681 [12:17<16:22, 2.51s/it] 43%|████▎ | 291/681 [12:19<16:39, 2.56s/it] {'loss': 1.0652, 'grad_norm': 27.926626205444336, 'learning_rate': 3.5565456543517485e-07, 'fcm_dpo/beta': 0.002243693685159087, 'fcm_dpo/q_t': 0.400044322013855, 'fcm_dpo/delta': -0.02954481914639473, 'fcm_dpo/margin': 190.7955780029297, 'margin_dpo/margin_mean': 190.79556274414062, 'margin_dpo/margin_std': 225.35430908203125, 'logps/chosen': -292.1304931640625, 'logps/rejected': -508.9627685546875, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'KL/chosen_KL_mean': -228.8701171875, 'KL/rejected_KL_mean': -419.6656799316406, 'KL/mean': -324.2679138183594, 'KL/std': 185.85340881347656, 'logits/chosen': -0.3716287612915039, 'logits/rejected': -0.3628491163253784, 'epoch': 0.43} + 43%|████▎ | 291/681 [12:19<16:39, 2.56s/it] 43%|████▎ | 292/681 [12:22<16:14, 2.51s/it] {'loss': 1.0614, 'grad_norm': 22.149768829345703, 'learning_rate': 3.5449008622169583e-07, 'fcm_dpo/beta': 0.002220253925770521, 'fcm_dpo/q_t': 0.39462825655937195, 'fcm_dpo/delta': -0.057921458035707474, 'fcm_dpo/margin': 205.0364990234375, 'margin_dpo/margin_mean': 205.0364990234375, 'margin_dpo/margin_std': 262.11480712890625, 'logps/chosen': -310.5567626953125, 'logps/rejected': -551.6361083984375, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'KL/chosen_KL_mean': -256.63824462890625, 'KL/rejected_KL_mean': -461.67474365234375, 'KL/mean': -359.156494140625, 'KL/std': 220.2518768310547, 'logits/chosen': -0.381770521402359, 'logits/rejected': -0.3677070736885071, 'epoch': 0.43} + 43%|████▎ | 292/681 [12:22<16:14, 2.51s/it] 43%|████▎ | 293/681 [12:24<16:26, 2.54s/it] {'loss': 1.1311, 'grad_norm': 25.350297927856445, 'learning_rate': 3.5332285359726846e-07, 'fcm_dpo/beta': 0.002223607152700424, 'fcm_dpo/q_t': 0.4189508557319641, 'fcm_dpo/delta': 0.04278174415230751, 'fcm_dpo/margin': 161.2817840576172, 'margin_dpo/margin_mean': 161.2817840576172, 'margin_dpo/margin_std': 250.83364868164062, 'logps/chosen': -321.36907958984375, 'logps/rejected': -500.12725830078125, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.85244750976562, 'KL/chosen_KL_mean': -260.9930419921875, 'KL/rejected_KL_mean': -422.27484130859375, 'KL/mean': -341.6339416503906, 'KL/std': 220.97372436523438, 'logits/chosen': -0.37213313579559326, 'logits/rejected': -0.3637707829475403, 'epoch': 0.43} + 43%|████▎ | 293/681 [12:24<16:26, 2.54s/it] 43%|████▎ | 294/681 [12:27<16:09, 2.51s/it] {'loss': 1.0956, 'grad_norm': 21.2161808013916, 'learning_rate': 3.5215289831955786e-07, 'fcm_dpo/beta': 0.002221715170890093, 'fcm_dpo/q_t': 0.4095104932785034, 'fcm_dpo/delta': 0.002862263470888138, 'fcm_dpo/margin': 178.77838134765625, 'margin_dpo/margin_mean': 178.77838134765625, 'margin_dpo/margin_std': 243.22738647460938, 'logps/chosen': -288.1473083496094, 'logps/rejected': -500.73516845703125, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'KL/chosen_KL_mean': -240.05978393554688, 'KL/rejected_KL_mean': -418.83819580078125, 'KL/mean': -329.448974609375, 'KL/std': 193.92105102539062, 'logits/chosen': -0.4091721773147583, 'logits/rejected': -0.4162572920322418, 'epoch': 0.43} + 43%|████▎ | 294/681 [12:27<16:09, 2.51s/it] 43%|████▎ | 295/681 [12:29<16:00, 2.49s/it] {'loss': 1.0697, 'grad_norm': 28.84412384033203, 'learning_rate': 3.509802512179737e-07, 'fcm_dpo/beta': 0.0022136676125228405, 'fcm_dpo/q_t': 0.39920759201049805, 'fcm_dpo/delta': -0.04604334011673927, 'fcm_dpo/margin': 200.58251953125, 'margin_dpo/margin_mean': 200.58251953125, 'margin_dpo/margin_std': 263.1807861328125, 'logps/chosen': -339.7589111328125, 'logps/rejected': -577.873046875, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'KL/chosen_KL_mean': -289.834228515625, 'KL/rejected_KL_mean': -490.416748046875, 'KL/mean': -390.12548828125, 'KL/std': 226.3201904296875, 'logits/chosen': -0.4074261784553528, 'logits/rejected': -0.409212589263916, 'epoch': 0.43} + 43%|████▎ | 295/681 [12:29<16:00, 2.49s/it] 43%|████▎ | 296/681 [12:32<15:54, 2.48s/it] {'loss': 1.1886, 'grad_norm': 32.84729766845703, 'learning_rate': 3.498049431928577e-07, 'fcm_dpo/beta': 0.002201622352004051, 'fcm_dpo/q_t': 0.4300941824913025, 'fcm_dpo/delta': -0.008969346061348915, 'fcm_dpo/margin': 137.62461853027344, 'margin_dpo/margin_mean': 137.62461853027344, 'margin_dpo/margin_std': 264.7969665527344, 'logps/chosen': -424.3894958496094, 'logps/rejected': -589.6119384765625, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'KL/chosen_KL_mean': -358.89825439453125, 'KL/rejected_KL_mean': -496.5228576660156, 'KL/mean': -427.7105712890625, 'KL/std': 208.72183227539062, 'logits/chosen': -0.4741077721118927, 'logits/rejected': -0.45637625455856323, 'epoch': 0.43} + 43%|████▎ | 296/681 [12:32<15:54, 2.48s/it] 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] {'loss': 1.1084, 'grad_norm': 34.864501953125, 'learning_rate': 3.486270052146694e-07, 'fcm_dpo/beta': 0.002210353035479784, 'fcm_dpo/q_t': 0.4156332015991211, 'fcm_dpo/delta': 0.03913431614637375, 'fcm_dpo/margin': 163.90359497070312, 'margin_dpo/margin_mean': 163.90359497070312, 'margin_dpo/margin_std': 211.23016357421875, 'logps/chosen': -363.9125061035156, 'logps/rejected': -566.4776611328125, 'logps/ref_chosen': -56.476951599121094, 'logps/ref_rejected': -95.1385498046875, 'KL/chosen_KL_mean': -307.435546875, 'KL/rejected_KL_mean': -471.3391418457031, 'KL/mean': -389.3873291015625, 'KL/std': 198.08912658691406, 'logits/chosen': -0.4226665794849396, 'logits/rejected': -0.42391157150268555, 'epoch': 0.44} + 44%|████▎ | 297/681 [12:34<16:03, 2.51s/it] 44%|████▍ | 298/681 [12:37<16:08, 2.53s/it] {'loss': 1.0729, 'grad_norm': 34.56257247924805, 'learning_rate': 3.474464683231698e-07, 'fcm_dpo/beta': 0.002200640505179763, 'fcm_dpo/q_t': 0.3964436948299408, 'fcm_dpo/delta': -0.07795488834381104, 'fcm_dpo/margin': 215.52365112304688, 'margin_dpo/margin_mean': 215.52365112304688, 'margin_dpo/margin_std': 318.0645751953125, 'logps/chosen': -387.97625732421875, 'logps/rejected': -652.8369140625, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'KL/chosen_KL_mean': -320.65106201171875, 'KL/rejected_KL_mean': -536.1747436523438, 'KL/mean': -428.41290283203125, 'KL/std': 268.907470703125, 'logits/chosen': -0.4743010401725769, 'logits/rejected': -0.49710047245025635, 'epoch': 0.44} + 44%|████▍ | 298/681 [12:37<16:08, 2.53s/it] 44%|████▍ | 299/681 [12:39<16:06, 2.53s/it] {'loss': 1.0958, 'grad_norm': 38.42890930175781, 'learning_rate': 3.462633636266041e-07, 'fcm_dpo/beta': 0.00219709612429142, 'fcm_dpo/q_t': 0.40845823287963867, 'fcm_dpo/delta': -0.004710428416728973, 'fcm_dpo/margin': 183.76527404785156, 'margin_dpo/margin_mean': 183.76528930664062, 'margin_dpo/margin_std': 249.51800537109375, 'logps/chosen': -315.95849609375, 'logps/rejected': -535.0899658203125, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'KL/chosen_KL_mean': -266.99639892578125, 'KL/rejected_KL_mean': -450.76171875, 'KL/mean': -358.8790283203125, 'KL/std': 197.55548095703125, 'logits/chosen': -0.42935478687286377, 'logits/rejected': -0.4295846223831177, 'epoch': 0.44} + 44%|████▍ | 299/681 [12:39<16:06, 2.53s/it] 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] {'loss': 1.0619, 'grad_norm': 31.024282455444336, 'learning_rate': 3.4507772230088147e-07, 'fcm_dpo/beta': 0.0021653189323842525, 'fcm_dpo/q_t': 0.3930676281452179, 'fcm_dpo/delta': -0.07065486907958984, 'fcm_dpo/margin': 215.855712890625, 'margin_dpo/margin_mean': 215.855712890625, 'margin_dpo/margin_std': 283.98614501953125, 'logps/chosen': -389.18536376953125, 'logps/rejected': -641.933837890625, 'logps/ref_chosen': -59.07371139526367, 'logps/ref_rejected': -95.9664535522461, 'KL/chosen_KL_mean': -330.11163330078125, 'KL/rejected_KL_mean': -545.9674072265625, 'KL/mean': -438.03948974609375, 'KL/std': 232.30531311035156, 'logits/chosen': -0.4451923668384552, 'logits/rejected': -0.4505726099014282, 'epoch': 0.44} + 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] 44%|████▍ | 301/681 [12:44<15:43, 2.48s/it] {'loss': 1.048, 'grad_norm': 25.04326629638672, 'learning_rate': 3.4388957558875316e-07, 'fcm_dpo/beta': 0.002126394771039486, 'fcm_dpo/q_t': 0.3957647681236267, 'fcm_dpo/delta': -0.0630912333726883, 'fcm_dpo/margin': 216.28659057617188, 'margin_dpo/margin_mean': 216.28659057617188, 'margin_dpo/margin_std': 261.11767578125, 'logps/chosen': -333.4117431640625, 'logps/rejected': -584.802490234375, 'logps/ref_chosen': -57.249366760253906, 'logps/ref_rejected': -92.35354614257812, 'KL/chosen_KL_mean': -276.162353515625, 'KL/rejected_KL_mean': -492.4489440917969, 'KL/mean': -384.3056640625, 'KL/std': 221.1756591796875, 'logits/chosen': -0.45365726947784424, 'logits/rejected': -0.45016711950302124, 'epoch': 0.44} + 44%|████▍ | 301/681 [12:44<15:43, 2.48s/it] 44%|████▍ | 302/681 [12:47<15:56, 2.52s/it] {'loss': 1.095, 'grad_norm': 35.94118118286133, 'learning_rate': 3.426989547989902e-07, 'fcm_dpo/beta': 0.002119125798344612, 'fcm_dpo/q_t': 0.41012802720069885, 'fcm_dpo/delta': 0.006838139146566391, 'fcm_dpo/margin': 185.53775024414062, 'margin_dpo/margin_mean': 185.53775024414062, 'margin_dpo/margin_std': 245.56341552734375, 'logps/chosen': -312.0766296386719, 'logps/rejected': -543.6427612304688, 'logps/ref_chosen': -51.197994232177734, 'logps/ref_rejected': -97.22636413574219, 'KL/chosen_KL_mean': -260.8786315917969, 'KL/rejected_KL_mean': -446.4163818359375, 'KL/mean': -353.64752197265625, 'KL/std': 209.54000854492188, 'logits/chosen': -0.4112318456172943, 'logits/rejected': -0.41728314757347107, 'epoch': 0.44} + 44%|████▍ | 302/681 [12:47<15:56, 2.52s/it] 44%|████▍ | 303/681 [12:50<16:29, 2.62s/it] {'loss': 1.1261, 'grad_norm': 29.625904083251953, 'learning_rate': 3.4150589130555773e-07, 'fcm_dpo/beta': 0.0021344092674553394, 'fcm_dpo/q_t': 0.4166523218154907, 'fcm_dpo/delta': 0.03814225643873215, 'fcm_dpo/margin': 170.1665496826172, 'margin_dpo/margin_mean': 170.1665496826172, 'margin_dpo/margin_std': 254.2838897705078, 'logps/chosen': -333.0869445800781, 'logps/rejected': -523.4849853515625, 'logps/ref_chosen': -66.71394348144531, 'logps/ref_rejected': -86.94542694091797, 'KL/chosen_KL_mean': -266.37298583984375, 'KL/rejected_KL_mean': -436.53955078125, 'KL/mean': -351.4562683105469, 'KL/std': 212.11721801757812, 'logits/chosen': -0.4059848487377167, 'logits/rejected': -0.3904208242893219, 'epoch': 0.44} + 44%|████▍ | 303/681 [12:50<16:29, 2.62s/it] 45%|████▍ | 304/681 [12:52<16:38, 2.65s/it] {'loss': 1.0533, 'grad_norm': 24.7471981048584, 'learning_rate': 3.403104165467883e-07, 'fcm_dpo/beta': 0.0021487209014594555, 'fcm_dpo/q_t': 0.40207067131996155, 'fcm_dpo/delta': -0.011212758719921112, 'fcm_dpo/margin': 191.04885864257812, 'margin_dpo/margin_mean': 191.04885864257812, 'margin_dpo/margin_std': 175.52703857421875, 'logps/chosen': -314.6591491699219, 'logps/rejected': -524.2293701171875, 'logps/ref_chosen': -71.95069885253906, 'logps/ref_rejected': -90.47203063964844, 'KL/chosen_KL_mean': -242.7084503173828, 'KL/rejected_KL_mean': -433.75732421875, 'KL/mean': -338.2328796386719, 'KL/std': 192.66885375976562, 'logits/chosen': -0.44146013259887695, 'logits/rejected': -0.4333987236022949, 'epoch': 0.45} + 45%|████▍ | 304/681 [12:52<16:38, 2.65s/it] 45%|████▍ | 305/681 [12:55<16:34, 2.64s/it] {'loss': 1.1129, 'grad_norm': 25.216215133666992, 'learning_rate': 3.391125620245535e-07, 'fcm_dpo/beta': 0.0021360788960009813, 'fcm_dpo/q_t': 0.41488200426101685, 'fcm_dpo/delta': 0.02424509823322296, 'fcm_dpo/margin': 176.14801025390625, 'margin_dpo/margin_mean': 176.1479949951172, 'margin_dpo/margin_std': 249.10455322265625, 'logps/chosen': -328.4869079589844, 'logps/rejected': -530.5942993164062, 'logps/ref_chosen': -66.79523468017578, 'logps/ref_rejected': -92.75459289550781, 'KL/chosen_KL_mean': -261.6916809082031, 'KL/rejected_KL_mean': -437.8396911621094, 'KL/mean': -349.76568603515625, 'KL/std': 222.32864379882812, 'logits/chosen': -0.4338444471359253, 'logits/rejected': -0.4185817837715149, 'epoch': 0.45} + 45%|████▍ | 305/681 [12:55<16:34, 2.64s/it] 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] {'loss': 1.1056, 'grad_norm': 25.18739891052246, 'learning_rate': 3.3791235930343417e-07, 'fcm_dpo/beta': 0.0021633305586874485, 'fcm_dpo/q_t': 0.415255069732666, 'fcm_dpo/delta': 0.03478704392910004, 'fcm_dpo/margin': 169.38018798828125, 'margin_dpo/margin_mean': 169.38018798828125, 'margin_dpo/margin_std': 220.4286651611328, 'logps/chosen': -337.17926025390625, 'logps/rejected': -522.0347900390625, 'logps/ref_chosen': -69.68389892578125, 'logps/ref_rejected': -85.15919494628906, 'KL/chosen_KL_mean': -267.495361328125, 'KL/rejected_KL_mean': -436.8755798339844, 'KL/mean': -352.1854248046875, 'KL/std': 201.3372802734375, 'logits/chosen': -0.40468522906303406, 'logits/rejected': -0.37944549322128296, 'epoch': 0.45} + 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] 45%|████▌ | 307/681 [13:00<16:41, 2.68s/it] {'loss': 1.0951, 'grad_norm': 25.99106788635254, 'learning_rate': 3.367098400098881e-07, 'fcm_dpo/beta': 0.002166555728763342, 'fcm_dpo/q_t': 0.4101484417915344, 'fcm_dpo/delta': 0.01314252894371748, 'fcm_dpo/margin': 178.78890991210938, 'margin_dpo/margin_mean': 178.78890991210938, 'margin_dpo/margin_std': 231.8785400390625, 'logps/chosen': -312.35321044921875, 'logps/rejected': -507.94903564453125, 'logps/ref_chosen': -70.16542053222656, 'logps/ref_rejected': -86.97230529785156, 'KL/chosen_KL_mean': -242.18780517578125, 'KL/rejected_KL_mean': -420.9767150878906, 'KL/mean': -331.582275390625, 'KL/std': 187.04690551757812, 'logits/chosen': -0.4158622920513153, 'logits/rejected': -0.40115827322006226, 'epoch': 0.45} + 45%|████▌ | 307/681 [13:00<16:41, 2.68s/it] 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] {'loss': 1.0528, 'grad_norm': 31.040889739990234, 'learning_rate': 3.355050358314172e-07, 'fcm_dpo/beta': 0.0021638874895870686, 'fcm_dpo/q_t': 0.39992159605026245, 'fcm_dpo/delta': -0.02547409199178219, 'fcm_dpo/margin': 196.12857055664062, 'margin_dpo/margin_mean': 196.12855529785156, 'margin_dpo/margin_std': 203.5819091796875, 'logps/chosen': -308.3779296875, 'logps/rejected': -528.6337280273438, 'logps/ref_chosen': -55.2449951171875, 'logps/ref_rejected': -79.37226104736328, 'KL/chosen_KL_mean': -253.13291931152344, 'KL/rejected_KL_mean': -449.2614440917969, 'KL/mean': -351.1971740722656, 'KL/std': 208.60134887695312, 'logits/chosen': -0.35027140378952026, 'logits/rejected': -0.3394496440887451, 'epoch': 0.45} + 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] 45%|████▌ | 309/681 [13:06<16:30, 2.66s/it] {'loss': 1.0705, 'grad_norm': 28.08131217956543, 'learning_rate': 3.3429797851573183e-07, 'fcm_dpo/beta': 0.002161671407520771, 'fcm_dpo/q_t': 0.40092766284942627, 'fcm_dpo/delta': -0.02382533997297287, 'fcm_dpo/margin': 195.45639038085938, 'margin_dpo/margin_mean': 195.45639038085938, 'margin_dpo/margin_std': 234.07315063476562, 'logps/chosen': -308.08917236328125, 'logps/rejected': -536.92724609375, 'logps/ref_chosen': -48.959083557128906, 'logps/ref_rejected': -82.34072875976562, 'KL/chosen_KL_mean': -259.13006591796875, 'KL/rejected_KL_mean': -454.58648681640625, 'KL/mean': -356.8582763671875, 'KL/std': 223.6021270751953, 'logits/chosen': -0.36709824204444885, 'logits/rejected': -0.3614857792854309, 'epoch': 0.45} + 45%|████▌ | 309/681 [13:06<16:30, 2.66s/it] 46%|████▌ | 310/681 [13:08<16:27, 2.66s/it] {'loss': 1.0811, 'grad_norm': 24.29796600341797, 'learning_rate': 3.3308869986991487e-07, 'fcm_dpo/beta': 0.0021615237928926945, 'fcm_dpo/q_t': 0.40847909450531006, 'fcm_dpo/delta': 0.011311601847410202, 'fcm_dpo/margin': 179.8553924560547, 'margin_dpo/margin_mean': 179.85537719726562, 'margin_dpo/margin_std': 198.85214233398438, 'logps/chosen': -363.46875, 'logps/rejected': -560.5126342773438, 'logps/ref_chosen': -62.74177932739258, 'logps/ref_rejected': -79.9302978515625, 'KL/chosen_KL_mean': -300.7269287109375, 'KL/rejected_KL_mean': -480.58233642578125, 'KL/mean': -390.6546630859375, 'KL/std': 196.88262939453125, 'logits/chosen': -0.41836071014404297, 'logits/rejected': -0.4063529372215271, 'epoch': 0.46} + 46%|████▌ | 310/681 [13:08<16:27, 2.66s/it] 46%|████▌ | 311/681 [13:11<15:56, 2.59s/it] {'loss': 1.0811, 'grad_norm': 25.550621032714844, 'learning_rate': 3.3187723175958346e-07, 'fcm_dpo/beta': 0.0021447776816785336, 'fcm_dpo/q_t': 0.40380626916885376, 'fcm_dpo/delta': -0.025147156789898872, 'fcm_dpo/margin': 197.71823120117188, 'margin_dpo/margin_mean': 197.71823120117188, 'margin_dpo/margin_std': 265.0999450683594, 'logps/chosen': -367.0386657714844, 'logps/rejected': -589.1671142578125, 'logps/ref_chosen': -53.02798080444336, 'logps/ref_rejected': -77.43820190429688, 'KL/chosen_KL_mean': -314.01068115234375, 'KL/rejected_KL_mean': -511.7289123535156, 'KL/mean': -412.86981201171875, 'KL/std': 246.66839599609375, 'logits/chosen': -0.38108277320861816, 'logits/rejected': -0.356780469417572, 'epoch': 0.46} + 46%|████▌ | 311/681 [13:11<15:56, 2.59s/it] 46%|████▌ | 312/681 [13:13<15:23, 2.50s/it] {'loss': 1.1007, 'grad_norm': 22.847942352294922, 'learning_rate': 3.306636061080487e-07, 'fcm_dpo/beta': 0.002153100911527872, 'fcm_dpo/q_t': 0.4111559987068176, 'fcm_dpo/delta': 0.00674719363451004, 'fcm_dpo/margin': 182.63937377929688, 'margin_dpo/margin_mean': 182.63937377929688, 'margin_dpo/margin_std': 256.60113525390625, 'logps/chosen': -340.41552734375, 'logps/rejected': -549.4555053710938, 'logps/ref_chosen': -49.39221954345703, 'logps/ref_rejected': -75.79280853271484, 'KL/chosen_KL_mean': -291.0233154296875, 'KL/rejected_KL_mean': -473.6627197265625, 'KL/mean': -382.343017578125, 'KL/std': 216.25013732910156, 'logits/chosen': -0.38960134983062744, 'logits/rejected': -0.3816367983818054, 'epoch': 0.46} + 46%|████▌ | 312/681 [13:13<15:23, 2.50s/it] 46%|████▌ | 313/681 [13:15<15:02, 2.45s/it] {'loss': 1.0858, 'grad_norm': 31.167348861694336, 'learning_rate': 3.2944785489547537e-07, 'fcm_dpo/beta': 0.002157143084332347, 'fcm_dpo/q_t': 0.4041683077812195, 'fcm_dpo/delta': -0.016167178750038147, 'fcm_dpo/margin': 192.15664672851562, 'margin_dpo/margin_mean': 192.15664672851562, 'margin_dpo/margin_std': 247.33575439453125, 'logps/chosen': -324.88189697265625, 'logps/rejected': -553.2919921875, 'logps/ref_chosen': -50.152740478515625, 'logps/ref_rejected': -86.40620422363281, 'KL/chosen_KL_mean': -274.72918701171875, 'KL/rejected_KL_mean': -466.88580322265625, 'KL/mean': -370.8074951171875, 'KL/std': 231.34361267089844, 'logits/chosen': -0.46091365814208984, 'logits/rejected': -0.4598471522331238, 'epoch': 0.46} + 46%|████▌ | 313/681 [13:15<15:02, 2.45s/it] 46%|████▌ | 314/681 [13:18<15:27, 2.53s/it] {'loss': 1.1244, 'grad_norm': 20.98644256591797, 'learning_rate': 3.2823001015803857e-07, 'fcm_dpo/beta': 0.0021345026325434446, 'fcm_dpo/q_t': 0.4131431579589844, 'fcm_dpo/delta': 0.010156366974115372, 'fcm_dpo/margin': 182.71548461914062, 'margin_dpo/margin_mean': 182.71548461914062, 'margin_dpo/margin_std': 292.40838623046875, 'logps/chosen': -360.32037353515625, 'logps/rejected': -583.3948974609375, 'logps/ref_chosen': -57.237579345703125, 'logps/ref_rejected': -97.5965347290039, 'KL/chosen_KL_mean': -303.0827941894531, 'KL/rejected_KL_mean': -485.79833984375, 'KL/mean': -394.4405517578125, 'KL/std': 226.08700561523438, 'logits/chosen': -0.4531956911087036, 'logits/rejected': -0.45664170384407043, 'epoch': 0.46} + 46%|████▌ | 314/681 [13:18<15:27, 2.53s/it] 46%|████▋ | 315/681 [13:21<15:34, 2.55s/it] {'loss': 1.1123, 'grad_norm': 22.165285110473633, 'learning_rate': 3.270101039870797e-07, 'fcm_dpo/beta': 0.002153775654733181, 'fcm_dpo/q_t': 0.41625896096229553, 'fcm_dpo/delta': 0.038061730563640594, 'fcm_dpo/margin': 168.69830322265625, 'margin_dpo/margin_mean': 168.69830322265625, 'margin_dpo/margin_std': 228.2129364013672, 'logps/chosen': -325.64300537109375, 'logps/rejected': -530.95263671875, 'logps/ref_chosen': -49.06958770751953, 'logps/ref_rejected': -85.68087768554688, 'KL/chosen_KL_mean': -276.57342529296875, 'KL/rejected_KL_mean': -445.271728515625, 'KL/mean': -360.9225769042969, 'KL/std': 206.61561584472656, 'logits/chosen': -0.36769017577171326, 'logits/rejected': -0.37278226017951965, 'epoch': 0.46} + 46%|████▋ | 315/681 [13:21<15:34, 2.55s/it] 46%|████▋ | 316/681 [13:23<15:31, 2.55s/it] {'loss': 1.0169, 'grad_norm': 26.478769302368164, 'learning_rate': 3.2578816852826086e-07, 'fcm_dpo/beta': 0.0021110116504132748, 'fcm_dpo/q_t': 0.38528791069984436, 'fcm_dpo/delta': -0.11421408504247665, 'fcm_dpo/margin': 240.21713256835938, 'margin_dpo/margin_mean': 240.21714782714844, 'margin_dpo/margin_std': 269.4544677734375, 'logps/chosen': -323.58038330078125, 'logps/rejected': -610.8182373046875, 'logps/ref_chosen': -54.26074981689453, 'logps/ref_rejected': -101.2814712524414, 'KL/chosen_KL_mean': -269.3196105957031, 'KL/rejected_KL_mean': -509.5367736816406, 'KL/mean': -389.4281921386719, 'KL/std': 240.0266571044922, 'logits/chosen': -0.4204370975494385, 'logits/rejected': -0.42546719312667847, 'epoch': 0.46} + 46%|████▋ | 316/681 [13:23<15:31, 2.55s/it] 47%|████▋ | 317/681 [13:26<15:34, 2.57s/it] {'loss': 0.9783, 'grad_norm': 29.60879898071289, 'learning_rate': 3.2456423598071783e-07, 'fcm_dpo/beta': 0.0020721801556646824, 'fcm_dpo/q_t': 0.37356036901474, 'fcm_dpo/delta': -0.153153657913208, 'fcm_dpo/margin': 263.04327392578125, 'margin_dpo/margin_mean': 263.04327392578125, 'margin_dpo/margin_std': 248.7153778076172, 'logps/chosen': -331.3876953125, 'logps/rejected': -639.0357666015625, 'logps/ref_chosen': -56.094207763671875, 'logps/ref_rejected': -100.69905090332031, 'KL/chosen_KL_mean': -275.29345703125, 'KL/rejected_KL_mean': -538.336669921875, 'KL/mean': -406.8150939941406, 'KL/std': 220.98388671875, 'logits/chosen': -0.4123254120349884, 'logits/rejected': -0.4049742817878723, 'epoch': 0.47} + 47%|████▋ | 317/681 [13:26<15:34, 2.57s/it] 47%|████▋ | 318/681 [13:28<15:14, 2.52s/it] {'loss': 1.1006, 'grad_norm': 24.619739532470703, 'learning_rate': 3.233383385962115e-07, 'fcm_dpo/beta': 0.0020595774985849857, 'fcm_dpo/q_t': 0.4124523401260376, 'fcm_dpo/delta': 0.021566076204180717, 'fcm_dpo/margin': 184.04380798339844, 'margin_dpo/margin_mean': 184.0438232421875, 'margin_dpo/margin_std': 241.61331176757812, 'logps/chosen': -372.21044921875, 'logps/rejected': -574.372802734375, 'logps/ref_chosen': -64.64569854736328, 'logps/ref_rejected': -82.76425170898438, 'KL/chosen_KL_mean': -307.56475830078125, 'KL/rejected_KL_mean': -491.6085205078125, 'KL/mean': -399.586669921875, 'KL/std': 220.44793701171875, 'logits/chosen': -0.458835244178772, 'logits/rejected': -0.4265810549259186, 'epoch': 0.47} + 47%|████▋ | 318/681 [13:28<15:14, 2.52s/it] 47%|████▋ | 319/681 [13:31<15:31, 2.57s/it] {'loss': 1.035, 'grad_norm': 23.96953010559082, 'learning_rate': 3.2211050867827805e-07, 'fcm_dpo/beta': 0.0020323502831161022, 'fcm_dpo/q_t': 0.3905225992202759, 'fcm_dpo/delta': -0.07725685834884644, 'fcm_dpo/margin': 233.02137756347656, 'margin_dpo/margin_mean': 233.0213623046875, 'margin_dpo/margin_std': 266.6269226074219, 'logps/chosen': -318.3958435058594, 'logps/rejected': -615.93994140625, 'logps/ref_chosen': -49.383758544921875, 'logps/ref_rejected': -113.90650939941406, 'KL/chosen_KL_mean': -269.0120849609375, 'KL/rejected_KL_mean': -502.0334777832031, 'KL/mean': -385.52276611328125, 'KL/std': 247.28811645507812, 'logits/chosen': -0.3532301187515259, 'logits/rejected': -0.3711482882499695, 'epoch': 0.47} + 47%|████▋ | 319/681 [13:31<15:31, 2.57s/it] 47%|████▋ | 320/681 [13:34<15:37, 2.60s/it] {'loss': 1.0227, 'grad_norm': 23.290699005126953, 'learning_rate': 3.208807785813777e-07, 'fcm_dpo/beta': 0.0019884873181581497, 'fcm_dpo/q_t': 0.3870677351951599, 'fcm_dpo/delta': -0.09909307956695557, 'fcm_dpo/margin': 248.263916015625, 'margin_dpo/margin_mean': 248.263916015625, 'margin_dpo/margin_std': 279.31939697265625, 'logps/chosen': -343.86187744140625, 'logps/rejected': -630.2880859375, 'logps/ref_chosen': -59.50489044189453, 'logps/ref_rejected': -97.66717529296875, 'KL/chosen_KL_mean': -284.35699462890625, 'KL/rejected_KL_mean': -532.6209106445312, 'KL/mean': -408.48895263671875, 'KL/std': 253.5668487548828, 'logits/chosen': -0.41272127628326416, 'logits/rejected': -0.4184020161628723, 'epoch': 0.47} + 47%|████▋ | 320/681 [13:34<15:37, 2.60s/it] 47%|████▋ | 321/681 [13:36<15:22, 2.56s/it] {'loss': 1.082, 'grad_norm': 23.15062713623047, 'learning_rate': 3.1964918071004217e-07, 'fcm_dpo/beta': 0.0019679851830005646, 'fcm_dpo/q_t': 0.4036102294921875, 'fcm_dpo/delta': -0.024129830300807953, 'fcm_dpo/margin': 214.68557739257812, 'margin_dpo/margin_mean': 214.68557739257812, 'margin_dpo/margin_std': 282.3053283691406, 'logps/chosen': -409.8883972167969, 'logps/rejected': -654.6663818359375, 'logps/ref_chosen': -61.548683166503906, 'logps/ref_rejected': -91.64103698730469, 'KL/chosen_KL_mean': -348.3397216796875, 'KL/rejected_KL_mean': -563.0252685546875, 'KL/mean': -455.6825256347656, 'KL/std': 267.32122802734375, 'logits/chosen': -0.40483784675598145, 'logits/rejected': -0.3935700058937073, 'epoch': 0.47} + 47%|████▋ | 321/681 [13:36<15:22, 2.56s/it] 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] {'loss': 1.0394, 'grad_norm': 24.584089279174805, 'learning_rate': 3.184157475180207e-07, 'fcm_dpo/beta': 0.0019517629407346249, 'fcm_dpo/q_t': 0.39376458525657654, 'fcm_dpo/delta': -0.05857790261507034, 'fcm_dpo/margin': 233.38006591796875, 'margin_dpo/margin_mean': 233.3800811767578, 'margin_dpo/margin_std': 251.07766723632812, 'logps/chosen': -363.4832763671875, 'logps/rejected': -635.3232421875, 'logps/ref_chosen': -57.29003143310547, 'logps/ref_rejected': -95.74992370605469, 'KL/chosen_KL_mean': -306.1932373046875, 'KL/rejected_KL_mean': -539.5733642578125, 'KL/mean': -422.8832702636719, 'KL/std': 236.69369506835938, 'logits/chosen': -0.3697229027748108, 'logits/rejected': -0.36216434836387634, 'epoch': 0.47} + 47%|████▋ | 322/681 [13:39<15:26, 2.58s/it] 47%|████▋ | 323/681 [13:41<15:30, 2.60s/it] {'loss': 1.1079, 'grad_norm': 30.523874282836914, 'learning_rate': 3.171805115074251e-07, 'fcm_dpo/beta': 0.0019555268809199333, 'fcm_dpo/q_t': 0.41228896379470825, 'fcm_dpo/delta': 0.02367909625172615, 'fcm_dpo/margin': 192.8805694580078, 'margin_dpo/margin_mean': 192.88055419921875, 'margin_dpo/margin_std': 258.48101806640625, 'logps/chosen': -377.5281677246094, 'logps/rejected': -594.2366943359375, 'logps/ref_chosen': -51.23395919799805, 'logps/ref_rejected': -75.06192016601562, 'KL/chosen_KL_mean': -326.294189453125, 'KL/rejected_KL_mean': -519.1747436523438, 'KL/mean': -422.7344970703125, 'KL/std': 234.59567260742188, 'logits/chosen': -0.3859459161758423, 'logits/rejected': -0.37858086824417114, 'epoch': 0.47} + 47%|████▋ | 323/681 [13:41<15:30, 2.60s/it] 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] {'loss': 1.1457, 'grad_norm': 36.11357498168945, 'learning_rate': 3.1594350522787295e-07, 'fcm_dpo/beta': 0.001949124038219452, 'fcm_dpo/q_t': 0.41932082176208496, 'fcm_dpo/delta': -0.06465040892362595, 'fcm_dpo/margin': 184.39852905273438, 'margin_dpo/margin_mean': 184.39854431152344, 'margin_dpo/margin_std': 294.26849365234375, 'logps/chosen': -439.48834228515625, 'logps/rejected': -645.229248046875, 'logps/ref_chosen': -65.13516998291016, 'logps/ref_rejected': -86.47750854492188, 'KL/chosen_KL_mean': -374.3531494140625, 'KL/rejected_KL_mean': -558.751708984375, 'KL/mean': -466.55242919921875, 'KL/std': 251.7170867919922, 'logits/chosen': -0.4411655068397522, 'logits/rejected': -0.42205148935317993, 'epoch': 0.48} + 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] 48%|████▊ | 325/681 [13:47<15:51, 2.67s/it] {'loss': 1.1393, 'grad_norm': 28.9737548828125, 'learning_rate': 3.147047612756302e-07, 'fcm_dpo/beta': 0.0019613862968981266, 'fcm_dpo/q_t': 0.42428165674209595, 'fcm_dpo/delta': 0.07689196616411209, 'fcm_dpo/margin': 165.94529724121094, 'margin_dpo/margin_mean': 165.9453125, 'margin_dpo/margin_std': 232.77090454101562, 'logps/chosen': -351.0293884277344, 'logps/rejected': -530.844970703125, 'logps/ref_chosen': -56.215599060058594, 'logps/ref_rejected': -70.08592987060547, 'KL/chosen_KL_mean': -294.81378173828125, 'KL/rejected_KL_mean': -460.75909423828125, 'KL/mean': -377.78643798828125, 'KL/std': 227.09103393554688, 'logits/chosen': -0.4612424969673157, 'logits/rejected': -0.4356522858142853, 'epoch': 0.48} + 48%|████▊ | 325/681 [13:47<15:51, 2.67s/it] 48%|████▊ | 326/681 [13:49<15:52, 2.68s/it] {'loss': 1.1104, 'grad_norm': 27.75748634338379, 'learning_rate': 3.134643122927519e-07, 'fcm_dpo/beta': 0.001981673063710332, 'fcm_dpo/q_t': 0.4185304045677185, 'fcm_dpo/delta': 0.05636116489768028, 'fcm_dpo/margin': 174.4063262939453, 'margin_dpo/margin_mean': 174.4063262939453, 'margin_dpo/margin_std': 205.46969604492188, 'logps/chosen': -400.960693359375, 'logps/rejected': -582.4888916015625, 'logps/ref_chosen': -72.72496032714844, 'logps/ref_rejected': -79.8467788696289, 'KL/chosen_KL_mean': -328.23577880859375, 'KL/rejected_KL_mean': -502.64208984375, 'KL/mean': -415.43890380859375, 'KL/std': 211.7913818359375, 'logits/chosen': -0.4789705276489258, 'logits/rejected': -0.4462633728981018, 'epoch': 0.48} + 48%|████▊ | 326/681 [13:49<15:52, 2.68s/it] 48%|████▊ | 327/681 [13:52<15:52, 2.69s/it] {'loss': 1.0326, 'grad_norm': 28.559314727783203, 'learning_rate': 3.1222219096622264e-07, 'fcm_dpo/beta': 0.001960520865395665, 'fcm_dpo/q_t': 0.390238493680954, 'fcm_dpo/delta': -0.07271062582731247, 'fcm_dpo/margin': 239.12875366210938, 'margin_dpo/margin_mean': 239.12875366210938, 'margin_dpo/margin_std': 257.76385498046875, 'logps/chosen': -357.3126525878906, 'logps/rejected': -639.2407836914062, 'logps/ref_chosen': -69.13441467285156, 'logps/ref_rejected': -111.93377685546875, 'KL/chosen_KL_mean': -288.17822265625, 'KL/rejected_KL_mean': -527.3070068359375, 'KL/mean': -407.74261474609375, 'KL/std': 220.7349090576172, 'logits/chosen': -0.42903268337249756, 'logits/rejected': -0.41195765137672424, 'epoch': 0.48} + 48%|████▊ | 327/681 [13:52<15:52, 2.69s/it] 48%|████▊ | 328/681 [13:55<15:40, 2.66s/it] {'loss': 1.0738, 'grad_norm': 22.35950469970703, 'learning_rate': 3.1097843002709427e-07, 'fcm_dpo/beta': 0.001954542938619852, 'fcm_dpo/q_t': 0.40333792567253113, 'fcm_dpo/delta': -0.024569327011704445, 'fcm_dpo/margin': 216.6925811767578, 'margin_dpo/margin_mean': 216.69256591796875, 'margin_dpo/margin_std': 276.7907409667969, 'logps/chosen': -352.30181884765625, 'logps/rejected': -600.1621704101562, 'logps/ref_chosen': -59.68719482421875, 'logps/ref_rejected': -90.85499572753906, 'KL/chosen_KL_mean': -292.6146240234375, 'KL/rejected_KL_mean': -509.3071594238281, 'KL/mean': -400.96087646484375, 'KL/std': 242.0338134765625, 'logits/chosen': -0.4434688091278076, 'logits/rejected': -0.4467797577381134, 'epoch': 0.48} + 48%|████▊ | 328/681 [13:55<15:40, 2.66s/it] 48%|████▊ | 329/681 [13:57<15:16, 2.60s/it] {'loss': 1.0718, 'grad_norm': 30.763805389404297, 'learning_rate': 3.0973306224962437e-07, 'fcm_dpo/beta': 0.0019329939968883991, 'fcm_dpo/q_t': 0.4004287123680115, 'fcm_dpo/delta': -0.030582299456000328, 'fcm_dpo/margin': 221.80587768554688, 'margin_dpo/margin_mean': 221.80587768554688, 'margin_dpo/margin_std': 272.5767517089844, 'logps/chosen': -388.12957763671875, 'logps/rejected': -645.386962890625, 'logps/ref_chosen': -65.2461929321289, 'logps/ref_rejected': -100.69770812988281, 'KL/chosen_KL_mean': -322.8833923339844, 'KL/rejected_KL_mean': -544.6892700195312, 'KL/mean': -433.7863464355469, 'KL/std': 254.3605194091797, 'logits/chosen': -0.42895740270614624, 'logits/rejected': -0.42029309272766113, 'epoch': 0.48} + 48%|████▊ | 329/681 [13:57<15:16, 2.60s/it] 48%|████▊ | 330/681 [14:00<15:18, 2.62s/it] {'loss': 1.0643, 'grad_norm': 25.122922897338867, 'learning_rate': 3.084861204504122e-07, 'fcm_dpo/beta': 0.001930012134835124, 'fcm_dpo/q_t': 0.40108194947242737, 'fcm_dpo/delta': -0.026210233569145203, 'fcm_dpo/margin': 220.24560546875, 'margin_dpo/margin_mean': 220.24560546875, 'margin_dpo/margin_std': 259.4215087890625, 'logps/chosen': -317.3743591308594, 'logps/rejected': -577.4984741210938, 'logps/ref_chosen': -46.998348236083984, 'logps/ref_rejected': -86.87684631347656, 'KL/chosen_KL_mean': -270.37603759765625, 'KL/rejected_KL_mean': -490.62164306640625, 'KL/mean': -380.49884033203125, 'KL/std': 230.03123474121094, 'logits/chosen': -0.3836897611618042, 'logits/rejected': -0.38447412848472595, 'epoch': 0.48} + 48%|████▊ | 330/681 [14:00<15:18, 2.62s/it] 49%|████▊ | 331/681 [14:02<15:02, 2.58s/it] {'loss': 1.0157, 'grad_norm': 23.949661254882812, 'learning_rate': 3.072376374875335e-07, 'fcm_dpo/beta': 0.0019126099068671465, 'fcm_dpo/q_t': 0.39248865842819214, 'fcm_dpo/delta': -0.05275537818670273, 'fcm_dpo/margin': 235.46646118164062, 'margin_dpo/margin_mean': 235.46646118164062, 'margin_dpo/margin_std': 187.62429809570312, 'logps/chosen': -342.123291015625, 'logps/rejected': -616.0809326171875, 'logps/ref_chosen': -50.52424621582031, 'logps/ref_rejected': -89.01544189453125, 'KL/chosen_KL_mean': -291.59906005859375, 'KL/rejected_KL_mean': -527.0654907226562, 'KL/mean': -409.332275390625, 'KL/std': 194.7655029296875, 'logits/chosen': -0.44039618968963623, 'logits/rejected': -0.4362325668334961, 'epoch': 0.49} + 49%|████▊ | 331/681 [14:02<15:02, 2.58s/it] 49%|████▉ | 332/681 [14:05<14:42, 2.53s/it] {'loss': 1.1374, 'grad_norm': 22.072265625, 'learning_rate': 3.059876462596758e-07, 'fcm_dpo/beta': 0.0019352274248376489, 'fcm_dpo/q_t': 0.4261719584465027, 'fcm_dpo/delta': 0.0861460417509079, 'fcm_dpo/margin': 163.52247619628906, 'margin_dpo/margin_mean': 163.52249145507812, 'margin_dpo/margin_std': 219.849853515625, 'logps/chosen': -345.9086608886719, 'logps/rejected': -536.7359619140625, 'logps/ref_chosen': -49.18028259277344, 'logps/ref_rejected': -76.48515319824219, 'KL/chosen_KL_mean': -296.7283935546875, 'KL/rejected_KL_mean': -460.2508544921875, 'KL/mean': -378.4895935058594, 'KL/std': 197.91973876953125, 'logits/chosen': -0.43224036693573, 'logits/rejected': -0.41115298867225647, 'epoch': 0.49} + 49%|████▉ | 332/681 [14:05<14:42, 2.53s/it] 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] {'loss': 1.0795, 'grad_norm': 22.271997451782227, 'learning_rate': 3.0473617970527015e-07, 'fcm_dpo/beta': 0.0019211724866181612, 'fcm_dpo/q_t': 0.40155458450317383, 'fcm_dpo/delta': -0.03121526539325714, 'fcm_dpo/margin': 223.31976318359375, 'margin_dpo/margin_mean': 223.31976318359375, 'margin_dpo/margin_std': 292.02545166015625, 'logps/chosen': -395.4904479980469, 'logps/rejected': -650.0985717773438, 'logps/ref_chosen': -63.75574493408203, 'logps/ref_rejected': -95.04411315917969, 'KL/chosen_KL_mean': -331.73468017578125, 'KL/rejected_KL_mean': -555.054443359375, 'KL/mean': -443.3945617675781, 'KL/std': 243.6097412109375, 'logits/chosen': -0.44901585578918457, 'logits/rejected': -0.4433661699295044, 'epoch': 0.49} + 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] 49%|████▉ | 334/681 [14:10<14:22, 2.48s/it] {'loss': 1.1182, 'grad_norm': 26.76993179321289, 'learning_rate': 3.034832708016243e-07, 'fcm_dpo/beta': 0.001930908882059157, 'fcm_dpo/q_t': 0.4123017191886902, 'fcm_dpo/delta': 0.015167435631155968, 'fcm_dpo/margin': 199.5938720703125, 'margin_dpo/margin_mean': 199.5938720703125, 'margin_dpo/margin_std': 301.0901794433594, 'logps/chosen': -397.9949951171875, 'logps/rejected': -625.926025390625, 'logps/ref_chosen': -66.97975158691406, 'logps/ref_rejected': -95.31692504882812, 'KL/chosen_KL_mean': -331.0152587890625, 'KL/rejected_KL_mean': -530.609130859375, 'KL/mean': -430.8121643066406, 'KL/std': 269.2216796875, 'logits/chosen': -0.44665104150772095, 'logits/rejected': -0.44349536299705505, 'epoch': 0.49} + 49%|████▉ | 334/681 [14:10<14:22, 2.48s/it] 49%|████▉ | 335/681 [14:12<14:29, 2.51s/it] {'loss': 1.1826, 'grad_norm': 35.3482666015625, 'learning_rate': 3.022289525640531e-07, 'fcm_dpo/beta': 0.001974080689251423, 'fcm_dpo/q_t': 0.4333241581916809, 'fcm_dpo/delta': 0.10976044833660126, 'fcm_dpo/margin': 148.3397216796875, 'margin_dpo/margin_mean': 148.3397216796875, 'margin_dpo/margin_std': 268.9454650878906, 'logps/chosen': -418.9317321777344, 'logps/rejected': -592.3466796875, 'logps/ref_chosen': -62.54248046875, 'logps/ref_rejected': -87.61770629882812, 'KL/chosen_KL_mean': -356.3892517089844, 'KL/rejected_KL_mean': -504.72900390625, 'KL/mean': -430.55914306640625, 'KL/std': 242.3770294189453, 'logits/chosen': -0.482355535030365, 'logits/rejected': -0.4589642584323883, 'epoch': 0.49} + 49%|████▉ | 335/681 [14:12<14:29, 2.51s/it] 49%|████▉ | 336/681 [14:15<14:33, 2.53s/it] {'loss': 1.0614, 'grad_norm': 29.434301376342773, 'learning_rate': 3.009732580450086e-07, 'fcm_dpo/beta': 0.0019574996549636126, 'fcm_dpo/q_t': 0.39117854833602905, 'fcm_dpo/delta': -0.09448903799057007, 'fcm_dpo/margin': 250.28536987304688, 'margin_dpo/margin_mean': 250.285400390625, 'margin_dpo/margin_std': 347.0870666503906, 'logps/chosen': -413.72540283203125, 'logps/rejected': -713.8839111328125, 'logps/ref_chosen': -54.53115463256836, 'logps/ref_rejected': -104.40424346923828, 'KL/chosen_KL_mean': -359.19427490234375, 'KL/rejected_KL_mean': -609.4796752929688, 'KL/mean': -484.33697509765625, 'KL/std': 294.24444580078125, 'logits/chosen': -0.4395965039730072, 'logits/rejected': -0.44075945019721985, 'epoch': 0.49} + 49%|████▉ | 336/681 [14:15<14:33, 2.53s/it] 49%|████▉ | 337/681 [14:17<14:26, 2.52s/it] {'loss': 1.0447, 'grad_norm': 29.495193481445312, 'learning_rate': 2.9971622033320914e-07, 'fcm_dpo/beta': 0.001918459078297019, 'fcm_dpo/q_t': 0.3944718539714813, 'fcm_dpo/delta': -0.06264565885066986, 'fcm_dpo/margin': 239.52027893066406, 'margin_dpo/margin_mean': 239.52027893066406, 'margin_dpo/margin_std': 279.1695556640625, 'logps/chosen': -391.10931396484375, 'logps/rejected': -667.2279052734375, 'logps/ref_chosen': -65.12869262695312, 'logps/ref_rejected': -101.72701263427734, 'KL/chosen_KL_mean': -325.9806213378906, 'KL/rejected_KL_mean': -565.5008544921875, 'KL/mean': -445.74078369140625, 'KL/std': 236.7762908935547, 'logits/chosen': -0.48390763998031616, 'logits/rejected': -0.4708746075630188, 'epoch': 0.49} + 49%|████▉ | 337/681 [14:17<14:26, 2.52s/it] 50%|████▉ | 338/681 [14:20<14:28, 2.53s/it] {'loss': 1.0302, 'grad_norm': 25.081783294677734, 'learning_rate': 2.984578725527675e-07, 'fcm_dpo/beta': 0.0018986309878528118, 'fcm_dpo/q_t': 0.392913818359375, 'fcm_dpo/delta': -0.059694305062294006, 'fcm_dpo/margin': 240.65579223632812, 'margin_dpo/margin_mean': 240.65579223632812, 'margin_dpo/margin_std': 242.48886108398438, 'logps/chosen': -354.10931396484375, 'logps/rejected': -625.4109497070312, 'logps/ref_chosen': -58.422706604003906, 'logps/ref_rejected': -89.06854248046875, 'KL/chosen_KL_mean': -295.68658447265625, 'KL/rejected_KL_mean': -536.3424072265625, 'KL/mean': -416.0144958496094, 'KL/std': 231.92388916015625, 'logits/chosen': -0.46351104974746704, 'logits/rejected': -0.4610709249973297, 'epoch': 0.5} + 50%|████▉ | 338/681 [14:20<14:28, 2.53s/it] 50%|████▉ | 339/681 [14:22<14:01, 2.46s/it] {'loss': 1.0607, 'grad_norm': 26.538564682006836, 'learning_rate': 2.9719824786231796e-07, 'fcm_dpo/beta': 0.0018999692983925343, 'fcm_dpo/q_t': 0.3996594548225403, 'fcm_dpo/delta': -0.03361833840608597, 'fcm_dpo/margin': 226.93565368652344, 'margin_dpo/margin_mean': 226.93565368652344, 'margin_dpo/margin_std': 250.84112548828125, 'logps/chosen': -362.7000732421875, 'logps/rejected': -633.55126953125, 'logps/ref_chosen': -59.99531555175781, 'logps/ref_rejected': -103.9109115600586, 'KL/chosen_KL_mean': -302.7047424316406, 'KL/rejected_KL_mean': -529.640380859375, 'KL/mean': -416.17254638671875, 'KL/std': 236.2411346435547, 'logits/chosen': -0.5198140740394592, 'logits/rejected': -0.5054018497467041, 'epoch': 0.5} + 50%|████▉ | 339/681 [14:22<14:01, 2.46s/it] 50%|████▉ | 340/681 [14:25<14:12, 2.50s/it] {'loss': 1.1182, 'grad_norm': 23.04448890686035, 'learning_rate': 2.959373794541426e-07, 'fcm_dpo/beta': 0.0018835279624909163, 'fcm_dpo/q_t': 0.4155174195766449, 'fcm_dpo/delta': 0.03270437568426132, 'fcm_dpo/margin': 195.55636596679688, 'margin_dpo/margin_mean': 195.55636596679688, 'margin_dpo/margin_std': 281.4355163574219, 'logps/chosen': -370.8360595703125, 'logps/rejected': -586.66943359375, 'logps/ref_chosen': -52.83022689819336, 'logps/ref_rejected': -73.10723114013672, 'KL/chosen_KL_mean': -318.005859375, 'KL/rejected_KL_mean': -513.5621948242188, 'KL/mean': -415.7840270996094, 'KL/std': 228.09695434570312, 'logits/chosen': -0.39705830812454224, 'logits/rejected': -0.3717266917228699, 'epoch': 0.5} + 50%|████▉ | 340/681 [14:25<14:12, 2.50s/it] 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] {'loss': 1.0485, 'grad_norm': 26.15273094177246, 'learning_rate': 2.946753005532965e-07, 'fcm_dpo/beta': 0.0018748041475191712, 'fcm_dpo/q_t': 0.39694035053253174, 'fcm_dpo/delta': -0.04550610110163689, 'fcm_dpo/margin': 236.36013793945312, 'margin_dpo/margin_mean': 236.36013793945312, 'margin_dpo/margin_std': 258.4355163574219, 'logps/chosen': -356.7102355957031, 'logps/rejected': -646.98046875, 'logps/ref_chosen': -47.899803161621094, 'logps/ref_rejected': -101.80987548828125, 'KL/chosen_KL_mean': -308.8104248046875, 'KL/rejected_KL_mean': -545.1705322265625, 'KL/mean': -426.990478515625, 'KL/std': 248.2613983154297, 'logits/chosen': -0.42927074432373047, 'logits/rejected': -0.42871958017349243, 'epoch': 0.5} + 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] 50%|█████ | 342/681 [14:30<14:06, 2.50s/it] {'loss': 1.111, 'grad_norm': 23.40822410583496, 'learning_rate': 2.934120444167326e-07, 'fcm_dpo/beta': 0.0018906050827354193, 'fcm_dpo/q_t': 0.41175463795661926, 'fcm_dpo/delta': 0.01890111342072487, 'fcm_dpo/margin': 201.69537353515625, 'margin_dpo/margin_mean': 201.69537353515625, 'margin_dpo/margin_std': 286.6202392578125, 'logps/chosen': -374.05523681640625, 'logps/rejected': -596.3435668945312, 'logps/ref_chosen': -71.99664306640625, 'logps/ref_rejected': -92.58959197998047, 'KL/chosen_KL_mean': -302.05859375, 'KL/rejected_KL_mean': -503.75396728515625, 'KL/mean': -402.9062805175781, 'KL/std': 237.40493774414062, 'logits/chosen': -0.47432941198349, 'logits/rejected': -0.442875474691391, 'epoch': 0.5} + 50%|█████ | 342/681 [14:30<14:06, 2.50s/it] 50%|█████ | 343/681 [14:32<13:48, 2.45s/it] {'loss': 1.0107, 'grad_norm': 26.693635940551758, 'learning_rate': 2.9214764433242476e-07, 'fcm_dpo/beta': 0.0018635441083461046, 'fcm_dpo/q_t': 0.3894881308078766, 'fcm_dpo/delta': -0.06895594298839569, 'fcm_dpo/margin': 249.90182495117188, 'margin_dpo/margin_mean': 249.90182495117188, 'margin_dpo/margin_std': 208.7919158935547, 'logps/chosen': -325.6682434082031, 'logps/rejected': -632.2058715820312, 'logps/ref_chosen': -54.405616760253906, 'logps/ref_rejected': -111.04142761230469, 'KL/chosen_KL_mean': -271.26263427734375, 'KL/rejected_KL_mean': -521.1644287109375, 'KL/mean': -396.2135314941406, 'KL/std': 233.09645080566406, 'logits/chosen': -0.4619428813457489, 'logits/rejected': -0.4649538993835449, 'epoch': 0.5} + 50%|█████ | 343/681 [14:32<13:48, 2.45s/it] 51%|█████ | 344/681 [14:34<13:45, 2.45s/it] {'loss': 1.0759, 'grad_norm': 26.9531192779541, 'learning_rate': 2.9088213361849126e-07, 'fcm_dpo/beta': 0.0018696986371651292, 'fcm_dpo/q_t': 0.40508079528808594, 'fcm_dpo/delta': -0.022609613835811615, 'fcm_dpo/margin': 224.6697235107422, 'margin_dpo/margin_mean': 224.6697235107422, 'margin_dpo/margin_std': 269.2288818359375, 'logps/chosen': -332.5355224609375, 'logps/rejected': -593.8639526367188, 'logps/ref_chosen': -53.96466827392578, 'logps/ref_rejected': -90.62336730957031, 'KL/chosen_KL_mean': -278.57086181640625, 'KL/rejected_KL_mean': -503.2406005859375, 'KL/mean': -390.90570068359375, 'KL/std': 266.9576416015625, 'logits/chosen': -0.4527415633201599, 'logits/rejected': -0.44961199164390564, 'epoch': 0.51} + 51%|█████ | 344/681 [14:34<13:45, 2.45s/it] 51%|█████ | 345/681 [14:37<14:08, 2.52s/it] {'loss': 1.0482, 'grad_norm': 25.29005241394043, 'learning_rate': 2.896155456223163e-07, 'fcm_dpo/beta': 0.0018422373104840517, 'fcm_dpo/q_t': 0.395746111869812, 'fcm_dpo/delta': -0.0500393845140934, 'fcm_dpo/margin': 243.03695678710938, 'margin_dpo/margin_mean': 243.03695678710938, 'margin_dpo/margin_std': 273.3504638671875, 'logps/chosen': -397.6063232421875, 'logps/rejected': -678.447998046875, 'logps/ref_chosen': -61.685699462890625, 'logps/ref_rejected': -99.49041748046875, 'KL/chosen_KL_mean': -335.9206237792969, 'KL/rejected_KL_mean': -578.9576416015625, 'KL/mean': -457.4391174316406, 'KL/std': 244.952392578125, 'logits/chosen': -0.4590086340904236, 'logits/rejected': -0.4529004395008087, 'epoch': 0.51} + 51%|█████ | 345/681 [14:37<14:08, 2.52s/it] 51%|█████ | 346/681 [14:40<13:58, 2.50s/it] {'loss': 1.0887, 'grad_norm': 26.261621475219727, 'learning_rate': 2.883479137196714e-07, 'fcm_dpo/beta': 0.0018314840272068977, 'fcm_dpo/q_t': 0.4076474905014038, 'fcm_dpo/delta': 0.0066130333580076694, 'fcm_dpo/margin': 214.92916870117188, 'margin_dpo/margin_mean': 214.92916870117188, 'margin_dpo/margin_std': 269.51617431640625, 'logps/chosen': -397.69134521484375, 'logps/rejected': -634.779541015625, 'logps/ref_chosen': -55.256263732910156, 'logps/ref_rejected': -77.41532135009766, 'KL/chosen_KL_mean': -342.43505859375, 'KL/rejected_KL_mean': -557.3642578125, 'KL/mean': -449.899658203125, 'KL/std': 243.40853881835938, 'logits/chosen': -0.42875561118125916, 'logits/rejected': -0.41599297523498535, 'epoch': 0.51} + 51%|█████ | 346/681 [14:40<13:58, 2.50s/it] 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] {'loss': 1.0904, 'grad_norm': 21.977977752685547, 'learning_rate': 2.8707927131383614e-07, 'fcm_dpo/beta': 0.0018333385232836008, 'fcm_dpo/q_t': 0.4073731303215027, 'fcm_dpo/delta': -0.0019443881465122104, 'fcm_dpo/margin': 219.19906616210938, 'margin_dpo/margin_mean': 219.19906616210938, 'margin_dpo/margin_std': 289.9748229980469, 'logps/chosen': -405.75531005859375, 'logps/rejected': -659.7432861328125, 'logps/ref_chosen': -57.56623840332031, 'logps/ref_rejected': -92.35509490966797, 'KL/chosen_KL_mean': -348.1890563964844, 'KL/rejected_KL_mean': -567.38818359375, 'KL/mean': -457.7886047363281, 'KL/std': 259.5652160644531, 'logits/chosen': -0.423196017742157, 'logits/rejected': -0.4180574417114258, 'epoch': 0.51} + 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] 51%|█████ | 348/681 [14:44<13:12, 2.38s/it] {'loss': 1.1141, 'grad_norm': 25.500268936157227, 'learning_rate': 2.858096518347179e-07, 'fcm_dpo/beta': 0.0018353135092183948, 'fcm_dpo/q_t': 0.4174761176109314, 'fcm_dpo/delta': 0.04490099474787712, 'fcm_dpo/margin': 194.16824340820312, 'margin_dpo/margin_mean': 194.16824340820312, 'margin_dpo/margin_std': 251.42027282714844, 'logps/chosen': -363.67205810546875, 'logps/rejected': -590.6610107421875, 'logps/ref_chosen': -56.31770324707031, 'logps/ref_rejected': -89.13836669921875, 'KL/chosen_KL_mean': -307.3543701171875, 'KL/rejected_KL_mean': -501.52264404296875, 'KL/mean': -404.4385070800781, 'KL/std': 220.54977416992188, 'logits/chosen': -0.44019395112991333, 'logits/rejected': -0.43998709321022034, 'epoch': 0.51} + 51%|█████ | 348/681 [14:44<13:12, 2.38s/it] 51%|█████ | 349/681 [14:47<13:38, 2.46s/it] {'loss': 1.0942, 'grad_norm': 21.230770111083984, 'learning_rate': 2.845390887379706e-07, 'fcm_dpo/beta': 0.0018510316731408238, 'fcm_dpo/q_t': 0.40646952390670776, 'fcm_dpo/delta': -0.018560701981186867, 'fcm_dpo/margin': 225.6001739501953, 'margin_dpo/margin_mean': 225.60018920898438, 'margin_dpo/margin_std': 322.47943115234375, 'logps/chosen': -357.01300048828125, 'logps/rejected': -622.0927734375, 'logps/ref_chosen': -58.025516510009766, 'logps/ref_rejected': -97.50515747070312, 'KL/chosen_KL_mean': -298.98748779296875, 'KL/rejected_KL_mean': -524.587646484375, 'KL/mean': -411.78753662109375, 'KL/std': 259.8543395996094, 'logits/chosen': -0.41702014207839966, 'logits/rejected': -0.41563892364501953, 'epoch': 0.51} + 51%|█████ | 349/681 [14:47<13:38, 2.46s/it] 51%|█████▏ | 350/681 [14:49<14:01, 2.54s/it] {'loss': 1.0923, 'grad_norm': 27.918197631835938, 'learning_rate': 2.8326761550411346e-07, 'fcm_dpo/beta': 0.0018313410691916943, 'fcm_dpo/q_t': 0.40537285804748535, 'fcm_dpo/delta': -0.011365924030542374, 'fcm_dpo/margin': 224.13467407226562, 'margin_dpo/margin_mean': 224.13467407226562, 'margin_dpo/margin_std': 306.51544189453125, 'logps/chosen': -389.54541015625, 'logps/rejected': -639.2212524414062, 'logps/ref_chosen': -64.33049011230469, 'logps/ref_rejected': -89.87164306640625, 'KL/chosen_KL_mean': -325.2149353027344, 'KL/rejected_KL_mean': -549.349609375, 'KL/mean': -437.28228759765625, 'KL/std': 247.470703125, 'logits/chosen': -0.47454479336738586, 'logits/rejected': -0.477811336517334, 'epoch': 0.51} + 51%|█████▏ | 350/681 [14:50<14:01, 2.54s/it] 52%|█████▏ | 351/681 [14:52<14:02, 2.55s/it] {'loss': 1.0433, 'grad_norm': 29.366575241088867, 'learning_rate': 2.819952656376487e-07, 'fcm_dpo/beta': 0.0018058628775179386, 'fcm_dpo/q_t': 0.3909297585487366, 'fcm_dpo/delta': -0.09307081252336502, 'fcm_dpo/margin': 270.1547546386719, 'margin_dpo/margin_mean': 270.15478515625, 'margin_dpo/margin_std': 341.22308349609375, 'logps/chosen': -361.9317321777344, 'logps/rejected': -672.9797973632812, 'logps/ref_chosen': -60.6721305847168, 'logps/ref_rejected': -101.5654296875, 'KL/chosen_KL_mean': -301.2596130371094, 'KL/rejected_KL_mean': -571.4143676757812, 'KL/mean': -436.33697509765625, 'KL/std': 292.23785400390625, 'logits/chosen': -0.44908708333969116, 'logits/rejected': -0.44883760809898376, 'epoch': 0.52} + 52%|█████▏ | 351/681 [14:52<14:02, 2.55s/it] 52%|█████▏ | 352/681 [14:55<14:03, 2.56s/it] {'loss': 1.1878, 'grad_norm': 27.643993377685547, 'learning_rate': 2.8072207266617854e-07, 'fcm_dpo/beta': 0.0018303534016013145, 'fcm_dpo/q_t': 0.4339728057384491, 'fcm_dpo/delta': 0.11433063447475433, 'fcm_dpo/margin': 157.96470642089844, 'margin_dpo/margin_mean': 157.96470642089844, 'margin_dpo/margin_std': 294.22454833984375, 'logps/chosen': -430.52130126953125, 'logps/rejected': -594.1845092773438, 'logps/ref_chosen': -70.9434585571289, 'logps/ref_rejected': -76.6419677734375, 'KL/chosen_KL_mean': -359.5778503417969, 'KL/rejected_KL_mean': -517.5425415039062, 'KL/mean': -438.5601806640625, 'KL/std': 265.6382141113281, 'logits/chosen': -0.47322726249694824, 'logits/rejected': -0.4383177161216736, 'epoch': 0.52} + 52%|█████▏ | 352/681 [14:55<14:03, 2.56s/it] 52%|█████▏ | 353/681 [14:57<13:59, 2.56s/it] {'loss': 1.0973, 'grad_norm': 36.342987060546875, 'learning_rate': 2.794480701395219e-07, 'fcm_dpo/beta': 0.001833123154938221, 'fcm_dpo/q_t': 0.4085083305835724, 'fcm_dpo/delta': -0.005401637405157089, 'fcm_dpo/margin': 220.58767700195312, 'margin_dpo/margin_mean': 220.58767700195312, 'margin_dpo/margin_std': 301.33837890625, 'logps/chosen': -383.02227783203125, 'logps/rejected': -625.5501708984375, 'logps/ref_chosen': -58.39533996582031, 'logps/ref_rejected': -80.33553314208984, 'KL/chosen_KL_mean': -324.626953125, 'KL/rejected_KL_mean': -545.214599609375, 'KL/mean': -434.9208068847656, 'KL/std': 259.8445739746094, 'logits/chosen': -0.48048973083496094, 'logits/rejected': -0.46663162112236023, 'epoch': 0.52} + 52%|█████▏ | 353/681 [14:57<13:59, 2.56s/it] 52%|█████▏ | 354/681 [15:00<14:06, 2.59s/it] {'loss': 1.0521, 'grad_norm': 22.05979347229004, 'learning_rate': 2.781732916288303e-07, 'fcm_dpo/beta': 0.001836308278143406, 'fcm_dpo/q_t': 0.4003763198852539, 'fcm_dpo/delta': -0.025253523141145706, 'fcm_dpo/margin': 230.987060546875, 'margin_dpo/margin_mean': 230.987060546875, 'margin_dpo/margin_std': 238.68994140625, 'logps/chosen': -341.038818359375, 'logps/rejected': -600.9804077148438, 'logps/ref_chosen': -59.80299377441406, 'logps/ref_rejected': -88.75750732421875, 'KL/chosen_KL_mean': -281.23583984375, 'KL/rejected_KL_mean': -512.222900390625, 'KL/mean': -396.7293701171875, 'KL/std': 240.41439819335938, 'logits/chosen': -0.4675145745277405, 'logits/rejected': -0.4558253884315491, 'epoch': 0.52} + 52%|█████▏ | 354/681 [15:00<14:06, 2.59s/it] 52%|█████▏ | 355/681 [15:02<13:46, 2.54s/it] {'loss': 1.0547, 'grad_norm': 39.719356536865234, 'learning_rate': 2.7689777072570284e-07, 'fcm_dpo/beta': 0.0018241136567667127, 'fcm_dpo/q_t': 0.4008174538612366, 'fcm_dpo/delta': -0.021784018725156784, 'fcm_dpo/margin': 230.61721801757812, 'margin_dpo/margin_mean': 230.6171875, 'margin_dpo/margin_std': 236.90528869628906, 'logps/chosen': -333.0058898925781, 'logps/rejected': -591.900634765625, 'logps/ref_chosen': -54.12849807739258, 'logps/ref_rejected': -82.40606689453125, 'KL/chosen_KL_mean': -278.87738037109375, 'KL/rejected_KL_mean': -509.4945983886719, 'KL/mean': -394.18597412109375, 'KL/std': 236.02166748046875, 'logits/chosen': -0.5206550359725952, 'logits/rejected': -0.5103884339332581, 'epoch': 0.52} + 52%|█████▏ | 355/681 [15:02<13:46, 2.54s/it] 52%|█████▏ | 356/681 [15:05<14:03, 2.60s/it] {'loss': 1.2489, 'grad_norm': 27.5300350189209, 'learning_rate': 2.7562154104130176e-07, 'fcm_dpo/beta': 0.0018336132634431124, 'fcm_dpo/q_t': 0.4491380453109741, 'fcm_dpo/delta': 0.03628718480467796, 'fcm_dpo/margin': 122.51679992675781, 'margin_dpo/margin_mean': 122.51680755615234, 'margin_dpo/margin_std': 300.3589782714844, 'logps/chosen': -392.088623046875, 'logps/rejected': -525.8308715820312, 'logps/ref_chosen': -64.6738052368164, 'logps/ref_rejected': -75.89926147460938, 'KL/chosen_KL_mean': -327.414794921875, 'KL/rejected_KL_mean': -449.9316101074219, 'KL/mean': -388.6732177734375, 'KL/std': 244.87353515625, 'logits/chosen': -0.4934132695198059, 'logits/rejected': -0.46816959977149963, 'epoch': 0.52} + 52%|█████▏ | 356/681 [15:05<14:03, 2.60s/it] 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] {'loss': 1.0915, 'grad_norm': 27.42888641357422, 'learning_rate': 2.7434463620546594e-07, 'fcm_dpo/beta': 0.0018397256499156356, 'fcm_dpo/q_t': 0.41156771779060364, 'fcm_dpo/delta': 0.022911615669727325, 'fcm_dpo/margin': 205.3372802734375, 'margin_dpo/margin_mean': 205.3372802734375, 'margin_dpo/margin_std': 241.0042724609375, 'logps/chosen': -336.35186767578125, 'logps/rejected': -575.8045043945312, 'logps/ref_chosen': -52.725799560546875, 'logps/ref_rejected': -86.84115600585938, 'KL/chosen_KL_mean': -283.6260681152344, 'KL/rejected_KL_mean': -488.9633483886719, 'KL/mean': -386.2947082519531, 'KL/std': 233.4674835205078, 'logits/chosen': -0.4698370695114136, 'logits/rejected': -0.4574124217033386, 'epoch': 0.52} + 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] 53%|█████▎ | 358/681 [15:10<14:18, 2.66s/it] {'loss': 1.1185, 'grad_norm': 24.31972312927246, 'learning_rate': 2.730670898658255e-07, 'fcm_dpo/beta': 0.001860738848336041, 'fcm_dpo/q_t': 0.41889050602912903, 'fcm_dpo/delta': 0.04985009878873825, 'fcm_dpo/margin': 189.13580322265625, 'margin_dpo/margin_mean': 189.13580322265625, 'margin_dpo/margin_std': 257.4248352050781, 'logps/chosen': -327.2962646484375, 'logps/rejected': -541.5999755859375, 'logps/ref_chosen': -63.20543670654297, 'logps/ref_rejected': -88.373291015625, 'KL/chosen_KL_mean': -264.0908508300781, 'KL/rejected_KL_mean': -453.22662353515625, 'KL/mean': -358.65875244140625, 'KL/std': 234.24258422851562, 'logits/chosen': -0.48346900939941406, 'logits/rejected': -0.4679170846939087, 'epoch': 0.53} + 53%|█████▎ | 358/681 [15:10<14:18, 2.66s/it] 53%|█████▎ | 359/681 [15:13<14:07, 2.63s/it] {'loss': 1.0842, 'grad_norm': 26.327983856201172, 'learning_rate': 2.717889356869146e-07, 'fcm_dpo/beta': 0.0018613252323120832, 'fcm_dpo/q_t': 0.4068043828010559, 'fcm_dpo/delta': -0.007966313511133194, 'fcm_dpo/margin': 218.9563446044922, 'margin_dpo/margin_mean': 218.95635986328125, 'margin_dpo/margin_std': 277.9966735839844, 'logps/chosen': -361.3740234375, 'logps/rejected': -606.1339111328125, 'logps/ref_chosen': -56.370216369628906, 'logps/ref_rejected': -82.17375183105469, 'KL/chosen_KL_mean': -305.0037841796875, 'KL/rejected_KL_mean': -523.960205078125, 'KL/mean': -414.48199462890625, 'KL/std': 237.02291870117188, 'logits/chosen': -0.43576061725616455, 'logits/rejected': -0.42485448718070984, 'epoch': 0.53} + 53%|█████▎ | 359/681 [15:13<14:07, 2.63s/it] 53%|█████▎ | 360/681 [15:15<13:51, 2.59s/it] {'loss': 1.1061, 'grad_norm': 31.305709838867188, 'learning_rate': 2.7051020734928443e-07, 'fcm_dpo/beta': 0.001883307471871376, 'fcm_dpo/q_t': 0.41872933506965637, 'fcm_dpo/delta': 0.05981479212641716, 'fcm_dpo/margin': 181.6644287109375, 'margin_dpo/margin_mean': 181.6644287109375, 'margin_dpo/margin_std': 195.6152801513672, 'logps/chosen': -348.2937927246094, 'logps/rejected': -548.3367919921875, 'logps/ref_chosen': -51.460384368896484, 'logps/ref_rejected': -69.83892059326172, 'KL/chosen_KL_mean': -296.8334045410156, 'KL/rejected_KL_mean': -478.4978332519531, 'KL/mean': -387.6656188964844, 'KL/std': 208.1656951904297, 'logits/chosen': -0.4398846924304962, 'logits/rejected': -0.425040602684021, 'epoch': 0.53} + 53%|█████▎ | 360/681 [15:16<13:51, 2.59s/it] 53%|█████▎ | 361/681 [15:18<13:59, 2.62s/it] {'loss': 1.1334, 'grad_norm': 29.791786193847656, 'learning_rate': 2.6923093854861593e-07, 'fcm_dpo/beta': 0.0019093567971140146, 'fcm_dpo/q_t': 0.4211677312850952, 'fcm_dpo/delta': 0.061102624982595444, 'fcm_dpo/margin': 178.4139404296875, 'margin_dpo/margin_mean': 178.41392517089844, 'margin_dpo/margin_std': 260.59844970703125, 'logps/chosen': -384.8084716796875, 'logps/rejected': -600.1221923828125, 'logps/ref_chosen': -53.86951446533203, 'logps/ref_rejected': -90.7692642211914, 'KL/chosen_KL_mean': -330.93896484375, 'KL/rejected_KL_mean': -509.3529052734375, 'KL/mean': -420.14593505859375, 'KL/std': 238.32623291015625, 'logits/chosen': -0.4700263738632202, 'logits/rejected': -0.4652746319770813, 'epoch': 0.53} + 53%|█████▎ | 361/681 [15:18<13:59, 2.62s/it] 53%|█████▎ | 362/681 [15:21<13:46, 2.59s/it] {'loss': 0.9909, 'grad_norm': 21.00020408630371, 'learning_rate': 2.679511629948319e-07, 'fcm_dpo/beta': 0.0018680819775909185, 'fcm_dpo/q_t': 0.37981897592544556, 'fcm_dpo/delta': -0.129384845495224, 'fcm_dpo/margin': 279.48834228515625, 'margin_dpo/margin_mean': 279.48834228515625, 'margin_dpo/margin_std': 277.12896728515625, 'logps/chosen': -352.1115417480469, 'logps/rejected': -678.542724609375, 'logps/ref_chosen': -58.639060974121094, 'logps/ref_rejected': -105.58195495605469, 'KL/chosen_KL_mean': -293.47247314453125, 'KL/rejected_KL_mean': -572.9608154296875, 'KL/mean': -433.2166442871094, 'KL/std': 255.72384643554688, 'logits/chosen': -0.4529603123664856, 'logits/rejected': -0.46245017647743225, 'epoch': 0.53} + 53%|█████▎ | 362/681 [15:21<13:46, 2.59s/it] 53%|█████▎ | 363/681 [15:23<13:16, 2.51s/it] {'loss': 0.9956, 'grad_norm': 26.93399429321289, 'learning_rate': 2.6667091441120816e-07, 'fcm_dpo/beta': 0.0018322591204196215, 'fcm_dpo/q_t': 0.380690336227417, 'fcm_dpo/delta': -0.12359863519668579, 'fcm_dpo/margin': 282.3719482421875, 'margin_dpo/margin_mean': 282.3719482421875, 'margin_dpo/margin_std': 281.8260803222656, 'logps/chosen': -316.80096435546875, 'logps/rejected': -629.3094482421875, 'logps/ref_chosen': -44.558380126953125, 'logps/ref_rejected': -74.69496154785156, 'KL/chosen_KL_mean': -272.2425537109375, 'KL/rejected_KL_mean': -554.614501953125, 'KL/mean': -413.42852783203125, 'KL/std': 253.0455322265625, 'logits/chosen': -0.4361415505409241, 'logits/rejected': -0.42436856031417847, 'epoch': 0.53} + 53%|█████▎ | 363/681 [15:23<13:16, 2.51s/it] 53%|█████▎ | 364/681 [15:26<13:21, 2.53s/it] {'loss': 1.0951, 'grad_norm': 27.668123245239258, 'learning_rate': 2.6539022653348575e-07, 'fcm_dpo/beta': 0.001825526007451117, 'fcm_dpo/q_t': 0.40954408049583435, 'fcm_dpo/delta': 0.010103408247232437, 'fcm_dpo/margin': 213.612060546875, 'margin_dpo/margin_mean': 213.612060546875, 'margin_dpo/margin_std': 277.07574462890625, 'logps/chosen': -340.59893798828125, 'logps/rejected': -596.712158203125, 'logps/ref_chosen': -48.894622802734375, 'logps/ref_rejected': -91.395751953125, 'KL/chosen_KL_mean': -291.7043151855469, 'KL/rejected_KL_mean': -505.3163757324219, 'KL/mean': -398.51031494140625, 'KL/std': 243.93292236328125, 'logits/chosen': -0.4502606987953186, 'logits/rejected': -0.46004268527030945, 'epoch': 0.53} + 53%|█████▎ | 364/681 [15:26<13:21, 2.53s/it] 54%|█████▎ | 365/681 [15:28<13:27, 2.56s/it] {'loss': 1.074, 'grad_norm': 21.68266487121582, 'learning_rate': 2.641091331089811e-07, 'fcm_dpo/beta': 0.0018164238426834345, 'fcm_dpo/q_t': 0.4062727391719818, 'fcm_dpo/delta': -0.009190201759338379, 'fcm_dpo/margin': 225.06381225585938, 'margin_dpo/margin_mean': 225.06381225585938, 'margin_dpo/margin_std': 269.61572265625, 'logps/chosen': -345.48876953125, 'logps/rejected': -611.7615966796875, 'logps/ref_chosen': -51.49274444580078, 'logps/ref_rejected': -92.70166778564453, 'KL/chosen_KL_mean': -293.99603271484375, 'KL/rejected_KL_mean': -519.0599365234375, 'KL/mean': -406.5279541015625, 'KL/std': 261.5389099121094, 'logits/chosen': -0.4506559371948242, 'logits/rejected': -0.46002912521362305, 'epoch': 0.54} + 54%|█████▎ | 365/681 [15:28<13:27, 2.56s/it] 54%|█████▎ | 366/681 [15:31<13:25, 2.56s/it] {'loss': 1.0802, 'grad_norm': 23.559268951416016, 'learning_rate': 2.6282766789569736e-07, 'fcm_dpo/beta': 0.0018009209306910634, 'fcm_dpo/q_t': 0.40347927808761597, 'fcm_dpo/delta': -0.022701263427734375, 'fcm_dpo/margin': 233.8548583984375, 'margin_dpo/margin_mean': 233.85484313964844, 'margin_dpo/margin_std': 303.1095275878906, 'logps/chosen': -312.8511962890625, 'logps/rejected': -585.2958984375, 'logps/ref_chosen': -44.7205696105957, 'logps/ref_rejected': -83.31040954589844, 'KL/chosen_KL_mean': -268.130615234375, 'KL/rejected_KL_mean': -501.9854736328125, 'KL/mean': -385.05804443359375, 'KL/std': 256.3507385253906, 'logits/chosen': -0.4541221857070923, 'logits/rejected': -0.46955257654190063, 'epoch': 0.54} + 54%|█████▎ | 366/681 [15:31<13:25, 2.56s/it] 54%|█████▍ | 367/681 [15:34<13:43, 2.62s/it] {'loss': 1.1238, 'grad_norm': 18.683847427368164, 'learning_rate': 2.615458646614349e-07, 'fcm_dpo/beta': 0.0018272295128554106, 'fcm_dpo/q_t': 0.419744074344635, 'fcm_dpo/delta': 0.06128734350204468, 'fcm_dpo/margin': 186.4353485107422, 'margin_dpo/margin_mean': 186.4353485107422, 'margin_dpo/margin_std': 248.96206665039062, 'logps/chosen': -342.1902160644531, 'logps/rejected': -546.971435546875, 'logps/ref_chosen': -58.405418395996094, 'logps/ref_rejected': -76.75132751464844, 'KL/chosen_KL_mean': -283.7847900390625, 'KL/rejected_KL_mean': -470.22015380859375, 'KL/mean': -377.00250244140625, 'KL/std': 222.3704376220703, 'logits/chosen': -0.49926167726516724, 'logits/rejected': -0.4835873246192932, 'epoch': 0.54} + 54%|█████▍ | 367/681 [15:34<13:43, 2.62s/it] 54%|█████▍ | 368/681 [15:36<13:38, 2.62s/it] {'loss': 0.9563, 'grad_norm': 36.82114791870117, 'learning_rate': 2.6026375718290083e-07, 'fcm_dpo/beta': 0.0017914584605023265, 'fcm_dpo/q_t': 0.37201637029647827, 'fcm_dpo/delta': -0.14462688565254211, 'fcm_dpo/margin': 299.6893310546875, 'margin_dpo/margin_mean': 299.6893310546875, 'margin_dpo/margin_std': 219.16549682617188, 'logps/chosen': -303.9324951171875, 'logps/rejected': -657.724609375, 'logps/ref_chosen': -44.452518463134766, 'logps/ref_rejected': -98.55526733398438, 'KL/chosen_KL_mean': -259.47998046875, 'KL/rejected_KL_mean': -559.1693115234375, 'KL/mean': -409.32464599609375, 'KL/std': 250.1630859375, 'logits/chosen': -0.4651143252849579, 'logits/rejected': -0.4765470325946808, 'epoch': 0.54} + 54%|█████▍ | 368/681 [15:36<13:38, 2.62s/it] 54%|█████▍ | 369/681 [15:39<13:41, 2.63s/it] {'loss': 1.1905, 'grad_norm': 28.619403839111328, 'learning_rate': 2.589813792448196e-07, 'fcm_dpo/beta': 0.0018156407168135047, 'fcm_dpo/q_t': 0.43435177206993103, 'fcm_dpo/delta': 0.11753154546022415, 'fcm_dpo/margin': 157.07363891601562, 'margin_dpo/margin_mean': 157.07363891601562, 'margin_dpo/margin_std': 296.42974853515625, 'logps/chosen': -420.96990966796875, 'logps/rejected': -597.9578857421875, 'logps/ref_chosen': -71.38150024414062, 'logps/ref_rejected': -91.29582214355469, 'KL/chosen_KL_mean': -349.58837890625, 'KL/rejected_KL_mean': -506.66204833984375, 'KL/mean': -428.125244140625, 'KL/std': 251.02548217773438, 'logits/chosen': -0.47754406929016113, 'logits/rejected': -0.460124135017395, 'epoch': 0.54} + 54%|█████▍ | 369/681 [15:39<13:41, 2.63s/it] 54%|█████▍ | 370/681 [15:42<13:47, 2.66s/it] {'loss': 1.1907, 'grad_norm': 25.48780059814453, 'learning_rate': 2.5769876463904263e-07, 'fcm_dpo/beta': 0.0018587787635624409, 'fcm_dpo/q_t': 0.43549519777297974, 'fcm_dpo/delta': 0.12413851916790009, 'fcm_dpo/margin': 150.05091857910156, 'margin_dpo/margin_mean': 150.05091857910156, 'margin_dpo/margin_std': 276.411376953125, 'logps/chosen': -424.2408447265625, 'logps/rejected': -599.9440307617188, 'logps/ref_chosen': -71.60749816894531, 'logps/ref_rejected': -97.25978088378906, 'KL/chosen_KL_mean': -352.63336181640625, 'KL/rejected_KL_mean': -502.6842346191406, 'KL/mean': -427.6588134765625, 'KL/std': 246.27041625976562, 'logits/chosen': -0.47866642475128174, 'logits/rejected': -0.47251564264297485, 'epoch': 0.54} + 54%|█████▍ | 370/681 [15:42<13:47, 2.66s/it] 54%|█████▍ | 371/681 [15:44<13:48, 2.67s/it] {'loss': 1.1036, 'grad_norm': 24.080177307128906, 'learning_rate': 2.5641594716365744e-07, 'fcm_dpo/beta': 0.001868913066573441, 'fcm_dpo/q_t': 0.40844932198524475, 'fcm_dpo/delta': -0.004648314788937569, 'fcm_dpo/margin': 216.36563110351562, 'margin_dpo/margin_mean': 216.36563110351562, 'margin_dpo/margin_std': 315.74822998046875, 'logps/chosen': -414.58056640625, 'logps/rejected': -660.703857421875, 'logps/ref_chosen': -69.41448974609375, 'logps/ref_rejected': -99.17217254638672, 'KL/chosen_KL_mean': -345.16607666015625, 'KL/rejected_KL_mean': -561.5316772460938, 'KL/mean': -453.348876953125, 'KL/std': 264.92401123046875, 'logits/chosen': -0.5107867121696472, 'logits/rejected': -0.49860259890556335, 'epoch': 0.54} + 54%|█████▍ | 371/681 [15:44<13:48, 2.67s/it] 55%|█████▍ | 372/681 [15:46<13:07, 2.55s/it] {'loss': 1.0477, 'grad_norm': 23.18116569519043, 'learning_rate': 2.551329606220976e-07, 'fcm_dpo/beta': 0.001838641008362174, 'fcm_dpo/q_t': 0.3929249942302704, 'fcm_dpo/delta': -0.08088327199220657, 'fcm_dpo/margin': 259.337158203125, 'margin_dpo/margin_mean': 259.3371887207031, 'margin_dpo/margin_std': 330.2596435546875, 'logps/chosen': -389.09637451171875, 'logps/rejected': -665.155029296875, 'logps/ref_chosen': -61.8179931640625, 'logps/ref_rejected': -78.53948974609375, 'KL/chosen_KL_mean': -327.27838134765625, 'KL/rejected_KL_mean': -586.6155395507812, 'KL/mean': -456.94696044921875, 'KL/std': 292.30975341796875, 'logits/chosen': -0.47245320677757263, 'logits/rejected': -0.44926324486732483, 'epoch': 0.55} + 55%|█████▍ | 372/681 [15:47<13:07, 2.55s/it] 55%|█████▍ | 373/681 [15:49<13:00, 2.53s/it] {'loss': 1.0512, 'grad_norm': 29.970626831054688, 'learning_rate': 2.538498388222517e-07, 'fcm_dpo/beta': 0.0018302889075130224, 'fcm_dpo/q_t': 0.39616119861602783, 'fcm_dpo/delta': -0.04753255099058151, 'fcm_dpo/margin': 243.22650146484375, 'margin_dpo/margin_mean': 243.22650146484375, 'margin_dpo/margin_std': 269.6577453613281, 'logps/chosen': -416.6119079589844, 'logps/rejected': -681.580810546875, 'logps/ref_chosen': -64.21713256835938, 'logps/ref_rejected': -85.95960998535156, 'KL/chosen_KL_mean': -352.394775390625, 'KL/rejected_KL_mean': -595.6212158203125, 'KL/mean': -474.00799560546875, 'KL/std': 281.63482666015625, 'logits/chosen': -0.4819701910018921, 'logits/rejected': -0.45813024044036865, 'epoch': 0.55} + 55%|█████▍ | 373/681 [15:49<13:00, 2.53s/it] 55%|█████▍ | 374/681 [15:52<13:19, 2.60s/it] {'loss': 1.128, 'grad_norm': 37.50387954711914, 'learning_rate': 2.525666155755725e-07, 'fcm_dpo/beta': 0.0018100242596119642, 'fcm_dpo/q_t': 0.4148157835006714, 'fcm_dpo/delta': 0.011296160519123077, 'fcm_dpo/margin': 214.69683837890625, 'margin_dpo/margin_mean': 214.6968536376953, 'margin_dpo/margin_std': 351.6225891113281, 'logps/chosen': -404.38067626953125, 'logps/rejected': -642.0675048828125, 'logps/ref_chosen': -70.65018463134766, 'logps/ref_rejected': -93.64016723632812, 'KL/chosen_KL_mean': -333.73046875, 'KL/rejected_KL_mean': -548.4273681640625, 'KL/mean': -441.07891845703125, 'KL/std': 305.963623046875, 'logits/chosen': -0.5606328845024109, 'logits/rejected': -0.542881429195404, 'epoch': 0.55} + 55%|█████▍ | 374/681 [15:52<13:19, 2.60s/it] 55%|█████▌ | 375/681 [15:54<13:28, 2.64s/it] {'loss': 1.1173, 'grad_norm': 48.105567932128906, 'learning_rate': 2.512833246961859e-07, 'fcm_dpo/beta': 0.0018158955499529839, 'fcm_dpo/q_t': 0.4129091203212738, 'fcm_dpo/delta': 0.021893244236707687, 'fcm_dpo/margin': 208.34274291992188, 'margin_dpo/margin_mean': 208.3427276611328, 'margin_dpo/margin_std': 301.77508544921875, 'logps/chosen': -401.6160888671875, 'logps/rejected': -638.81689453125, 'logps/ref_chosen': -60.080223083496094, 'logps/ref_rejected': -88.93830871582031, 'KL/chosen_KL_mean': -341.535888671875, 'KL/rejected_KL_mean': -549.8786010742188, 'KL/mean': -445.7072448730469, 'KL/std': 251.57171630859375, 'logits/chosen': -0.49703970551490784, 'logits/rejected': -0.4937781095504761, 'epoch': 0.55} + 55%|█████▌ | 375/681 [15:55<13:28, 2.64s/it] 55%|█████▌ | 376/681 [15:57<13:28, 2.65s/it] {'loss': 1.0434, 'grad_norm': 26.295751571655273, 'learning_rate': 2.5e-07, 'fcm_dpo/beta': 0.0018059706781059504, 'fcm_dpo/q_t': 0.39158695936203003, 'fcm_dpo/delta': -0.07673737406730652, 'fcm_dpo/margin': 261.89935302734375, 'margin_dpo/margin_mean': 261.89935302734375, 'margin_dpo/margin_std': 315.4093322753906, 'logps/chosen': -397.9971008300781, 'logps/rejected': -702.7626953125, 'logps/ref_chosen': -62.660308837890625, 'logps/ref_rejected': -105.52660369873047, 'KL/chosen_KL_mean': -335.3367919921875, 'KL/rejected_KL_mean': -597.236083984375, 'KL/mean': -466.28643798828125, 'KL/std': 274.84326171875, 'logits/chosen': -0.5031468272209167, 'logits/rejected': -0.49027374386787415, 'epoch': 0.55} + 55%|█████▌ | 376/681 [15:57<13:28, 2.65s/it] 55%|█████▌ | 377/681 [16:00<13:10, 2.60s/it] {'loss': 1.0531, 'grad_norm': 22.948881149291992, 'learning_rate': 2.487166753038141e-07, 'fcm_dpo/beta': 0.0017904455307871103, 'fcm_dpo/q_t': 0.39518678188323975, 'fcm_dpo/delta': -0.06344657391309738, 'fcm_dpo/margin': 257.19952392578125, 'margin_dpo/margin_mean': 257.19952392578125, 'margin_dpo/margin_std': 316.891357421875, 'logps/chosen': -395.002685546875, 'logps/rejected': -696.4268188476562, 'logps/ref_chosen': -54.478736877441406, 'logps/ref_rejected': -98.70335388183594, 'KL/chosen_KL_mean': -340.5239562988281, 'KL/rejected_KL_mean': -597.7235107421875, 'KL/mean': -469.12371826171875, 'KL/std': 295.64276123046875, 'logits/chosen': -0.4441227614879608, 'logits/rejected': -0.44588595628738403, 'epoch': 0.55} + 55%|█████▌ | 377/681 [16:00<13:10, 2.60s/it] 56%|█████▌ | 378/681 [16:02<12:26, 2.46s/it] {'loss': 1.0243, 'grad_norm': 28.960853576660156, 'learning_rate': 2.4743338442442754e-07, 'fcm_dpo/beta': 0.0017552496865391731, 'fcm_dpo/q_t': 0.38744112849235535, 'fcm_dpo/delta': -0.08436104655265808, 'fcm_dpo/margin': 273.6103820800781, 'margin_dpo/margin_mean': 273.6103515625, 'margin_dpo/margin_std': 292.38726806640625, 'logps/chosen': -363.0859680175781, 'logps/rejected': -679.7227783203125, 'logps/ref_chosen': -45.02053451538086, 'logps/ref_rejected': -88.0469741821289, 'KL/chosen_KL_mean': -318.0654296875, 'KL/rejected_KL_mean': -591.67578125, 'KL/mean': -454.87060546875, 'KL/std': 273.9302978515625, 'logits/chosen': -0.4610844552516937, 'logits/rejected': -0.47619086503982544, 'epoch': 0.56} + 56%|█████▌ | 378/681 [16:02<12:26, 2.46s/it] 56%|█████▌ | 379/681 [16:04<12:23, 2.46s/it] {'loss': 1.0471, 'grad_norm': 25.876663208007812, 'learning_rate': 2.461501611777483e-07, 'fcm_dpo/beta': 0.001722155138850212, 'fcm_dpo/q_t': 0.3932555019855499, 'fcm_dpo/delta': -0.07153955847024918, 'fcm_dpo/margin': 271.4352722167969, 'margin_dpo/margin_mean': 271.4352722167969, 'margin_dpo/margin_std': 328.153076171875, 'logps/chosen': -407.2274475097656, 'logps/rejected': -739.78076171875, 'logps/ref_chosen': -53.182098388671875, 'logps/ref_rejected': -114.3001708984375, 'KL/chosen_KL_mean': -354.04534912109375, 'KL/rejected_KL_mean': -625.4805908203125, 'KL/mean': -489.762939453125, 'KL/std': 277.69482421875, 'logits/chosen': -0.43229052424430847, 'logits/rejected': -0.4548417925834656, 'epoch': 0.56} + 56%|█████▌ | 379/681 [16:04<12:23, 2.46s/it] 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] {'loss': 1.0225, 'grad_norm': 24.72132682800293, 'learning_rate': 2.4486703937790243e-07, 'fcm_dpo/beta': 0.0016977135092020035, 'fcm_dpo/q_t': 0.3837231993675232, 'fcm_dpo/delta': -0.10816927254199982, 'fcm_dpo/margin': 296.16217041015625, 'margin_dpo/margin_mean': 296.16217041015625, 'margin_dpo/margin_std': 341.88568115234375, 'logps/chosen': -415.0354919433594, 'logps/rejected': -764.0363159179688, 'logps/ref_chosen': -51.3530387878418, 'logps/ref_rejected': -104.19169616699219, 'KL/chosen_KL_mean': -363.6824645996094, 'KL/rejected_KL_mean': -659.8446044921875, 'KL/mean': -511.7635192871094, 'KL/std': 315.0853576660156, 'logits/chosen': -0.4478057622909546, 'logits/rejected': -0.47531557083129883, 'epoch': 0.56} + 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] 56%|█████▌ | 381/681 [16:09<12:12, 2.44s/it] {'loss': 1.169, 'grad_norm': 35.74467849731445, 'learning_rate': 2.435840528363426e-07, 'fcm_dpo/beta': 0.0017013371689245105, 'fcm_dpo/q_t': 0.4243543744087219, 'fcm_dpo/delta': 0.06901153177022934, 'fcm_dpo/margin': 195.93124389648438, 'margin_dpo/margin_mean': 195.93124389648438, 'margin_dpo/margin_std': 360.15875244140625, 'logps/chosen': -434.89617919921875, 'logps/rejected': -652.2437744140625, 'logps/ref_chosen': -57.80306625366211, 'logps/ref_rejected': -79.21940612792969, 'KL/chosen_KL_mean': -377.09307861328125, 'KL/rejected_KL_mean': -573.0244140625, 'KL/mean': -475.0587158203125, 'KL/std': 262.92779541015625, 'logits/chosen': -0.4533649682998657, 'logits/rejected': -0.4306221902370453, 'epoch': 0.56} + 56%|█████▌ | 381/681 [16:09<12:12, 2.44s/it] 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] {'loss': 1.0485, 'grad_norm': 24.592235565185547, 'learning_rate': 2.4230123536095745e-07, 'fcm_dpo/beta': 0.0017002095701172948, 'fcm_dpo/q_t': 0.3970971703529358, 'fcm_dpo/delta': -0.0450989231467247, 'fcm_dpo/margin': 260.62750244140625, 'margin_dpo/margin_mean': 260.62750244140625, 'margin_dpo/margin_std': 289.3924255371094, 'logps/chosen': -431.1990051269531, 'logps/rejected': -736.516357421875, 'logps/ref_chosen': -66.02030181884766, 'logps/ref_rejected': -110.71016693115234, 'KL/chosen_KL_mean': -365.1787109375, 'KL/rejected_KL_mean': -625.8062133789062, 'KL/mean': -495.492431640625, 'KL/std': 256.8338623046875, 'logits/chosen': -0.49676984548568726, 'logits/rejected': -0.5024675130844116, 'epoch': 0.56} + 56%|█████▌ | 382/681 [16:12<12:23, 2.49s/it] 56%|█████▌ | 383/681 [16:14<12:29, 2.51s/it] {'loss': 1.0922, 'grad_norm': 29.20414924621582, 'learning_rate': 2.4101862075518037e-07, 'fcm_dpo/beta': 0.0016838510055094957, 'fcm_dpo/q_t': 0.40153148770332336, 'fcm_dpo/delta': -0.03266420215368271, 'fcm_dpo/margin': 256.0433349609375, 'margin_dpo/margin_mean': 256.0433654785156, 'margin_dpo/margin_std': 373.36480712890625, 'logps/chosen': -430.00103759765625, 'logps/rejected': -729.3687744140625, 'logps/ref_chosen': -50.39148712158203, 'logps/ref_rejected': -93.71589660644531, 'KL/chosen_KL_mean': -379.60955810546875, 'KL/rejected_KL_mean': -635.6529541015625, 'KL/mean': -507.6312255859375, 'KL/std': 288.3782958984375, 'logits/chosen': -0.46060335636138916, 'logits/rejected': -0.4702298641204834, 'epoch': 0.56} + 56%|█████▌ | 383/681 [16:14<12:29, 2.51s/it] 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] {'loss': 1.1105, 'grad_norm': 22.670204162597656, 'learning_rate': 2.397362428170992e-07, 'fcm_dpo/beta': 0.001703817630186677, 'fcm_dpo/q_t': 0.41818147897720337, 'fcm_dpo/delta': 0.05552485212683678, 'fcm_dpo/margin': 203.06533813476562, 'margin_dpo/margin_mean': 203.0653533935547, 'margin_dpo/margin_std': 239.64828491210938, 'logps/chosen': -428.2738037109375, 'logps/rejected': -665.053955078125, 'logps/ref_chosen': -52.046104431152344, 'logps/ref_rejected': -85.76089477539062, 'KL/chosen_KL_mean': -376.22772216796875, 'KL/rejected_KL_mean': -579.2930908203125, 'KL/mean': -477.7603759765625, 'KL/std': 248.6445770263672, 'logits/chosen': -0.5022902488708496, 'logits/rejected': -0.49495917558670044, 'epoch': 0.56} + 56%|█████▋ | 384/681 [16:17<12:31, 2.53s/it] 57%|█████▋ | 385/681 [16:19<12:25, 2.52s/it] {'loss': 1.0662, 'grad_norm': 28.35698127746582, 'learning_rate': 2.3845413533856514e-07, 'fcm_dpo/beta': 0.0017044099513441324, 'fcm_dpo/q_t': 0.4049755930900574, 'fcm_dpo/delta': -0.0005003036931157112, 'fcm_dpo/margin': 234.95591735839844, 'margin_dpo/margin_mean': 234.95591735839844, 'margin_dpo/margin_std': 241.16229248046875, 'logps/chosen': -416.08868408203125, 'logps/rejected': -663.3204345703125, 'logps/ref_chosen': -65.55215454101562, 'logps/ref_rejected': -77.82792663574219, 'KL/chosen_KL_mean': -350.5365295410156, 'KL/rejected_KL_mean': -585.492431640625, 'KL/mean': -468.0144958496094, 'KL/std': 216.46690368652344, 'logits/chosen': -0.5131600499153137, 'logits/rejected': -0.4851377606391907, 'epoch': 0.57} + 57%|█████▋ | 385/681 [16:19<12:25, 2.52s/it] 57%|█████▋ | 386/681 [16:22<12:31, 2.55s/it] {'loss': 1.0717, 'grad_norm': 32.36057662963867, 'learning_rate': 2.3717233210430254e-07, 'fcm_dpo/beta': 0.0016963122179731727, 'fcm_dpo/q_t': 0.40111613273620605, 'fcm_dpo/delta': -0.03167739138007164, 'fcm_dpo/margin': 253.68276977539062, 'margin_dpo/margin_mean': 253.68276977539062, 'margin_dpo/margin_std': 324.97998046875, 'logps/chosen': -422.3779296875, 'logps/rejected': -710.166259765625, 'logps/ref_chosen': -58.22185516357422, 'logps/ref_rejected': -92.32742309570312, 'KL/chosen_KL_mean': -364.15606689453125, 'KL/rejected_KL_mean': -617.8388061523438, 'KL/mean': -490.9974365234375, 'KL/std': 270.4039611816406, 'logits/chosen': -0.48662200570106506, 'logits/rejected': -0.48291075229644775, 'epoch': 0.57} + 57%|█████▋ | 386/681 [16:22<12:31, 2.55s/it] 57%|█████▋ | 387/681 [16:25<12:46, 2.61s/it] {'loss': 1.1055, 'grad_norm': 27.685876846313477, 'learning_rate': 2.3589086689101889e-07, 'fcm_dpo/beta': 0.0016972242156043649, 'fcm_dpo/q_t': 0.4156672954559326, 'fcm_dpo/delta': 0.042357347905635834, 'fcm_dpo/margin': 211.57708740234375, 'margin_dpo/margin_mean': 211.57708740234375, 'margin_dpo/margin_std': 257.301513671875, 'logps/chosen': -453.5457458496094, 'logps/rejected': -690.87255859375, 'logps/ref_chosen': -66.41944885253906, 'logps/ref_rejected': -92.16915893554688, 'KL/chosen_KL_mean': -387.12628173828125, 'KL/rejected_KL_mean': -598.7034301757812, 'KL/mean': -492.91485595703125, 'KL/std': 246.6182403564453, 'logits/chosen': -0.542807936668396, 'logits/rejected': -0.5214394330978394, 'epoch': 0.57} + 57%|█████▋ | 387/681 [16:25<12:46, 2.61s/it] 57%|█████▋ | 388/681 [16:27<12:22, 2.53s/it] {'loss': 1.0296, 'grad_norm': 29.01787757873535, 'learning_rate': 2.3460977346651428e-07, 'fcm_dpo/beta': 0.001679969485849142, 'fcm_dpo/q_t': 0.38981735706329346, 'fcm_dpo/delta': -0.08814238011837006, 'fcm_dpo/margin': 287.8377380371094, 'margin_dpo/margin_mean': 287.8377380371094, 'margin_dpo/margin_std': 330.7196044921875, 'logps/chosen': -407.2157287597656, 'logps/rejected': -749.3570556640625, 'logps/ref_chosen': -50.129459381103516, 'logps/ref_rejected': -104.43305969238281, 'KL/chosen_KL_mean': -357.0862731933594, 'KL/rejected_KL_mean': -644.924072265625, 'KL/mean': -501.005126953125, 'KL/std': 291.4306335449219, 'logits/chosen': -0.4653438925743103, 'logits/rejected': -0.4758313298225403, 'epoch': 0.57} + 57%|█████▋ | 388/681 [16:27<12:22, 2.53s/it] 57%|█████▋ | 389/681 [16:29<12:09, 2.50s/it] {'loss': 1.0873, 'grad_norm': 32.89521789550781, 'learning_rate': 2.3332908558879177e-07, 'fcm_dpo/beta': 0.001672594342380762, 'fcm_dpo/q_t': 0.4067634344100952, 'fcm_dpo/delta': -0.004527151584625244, 'fcm_dpo/margin': 241.71041870117188, 'margin_dpo/margin_mean': 241.71041870117188, 'margin_dpo/margin_std': 314.6967468261719, 'logps/chosen': -444.99530029296875, 'logps/rejected': -706.7136840820312, 'logps/ref_chosen': -57.906593322753906, 'logps/ref_rejected': -77.91454315185547, 'KL/chosen_KL_mean': -387.08868408203125, 'KL/rejected_KL_mean': -628.7991333007812, 'KL/mean': -507.94390869140625, 'KL/std': 287.0469970703125, 'logits/chosen': -0.5016822218894958, 'logits/rejected': -0.49213531613349915, 'epoch': 0.57} + 57%|█████▋ | 389/681 [16:29<12:09, 2.50s/it] 57%|█████▋ | 390/681 [16:32<12:08, 2.50s/it] {'loss': 1.1064, 'grad_norm': 22.919740676879883, 'learning_rate': 2.320488370051681e-07, 'fcm_dpo/beta': 0.0016685773152858019, 'fcm_dpo/q_t': 0.41037964820861816, 'fcm_dpo/delta': -0.001378379762172699, 'fcm_dpo/margin': 240.36087036132812, 'margin_dpo/margin_mean': 240.36087036132812, 'margin_dpo/margin_std': 358.0471496582031, 'logps/chosen': -428.0667724609375, 'logps/rejected': -704.7298583984375, 'logps/ref_chosen': -49.22591781616211, 'logps/ref_rejected': -85.5281982421875, 'KL/chosen_KL_mean': -378.8408508300781, 'KL/rejected_KL_mean': -619.20166015625, 'KL/mean': -499.02130126953125, 'KL/std': 283.59832763671875, 'logits/chosen': -0.4558466672897339, 'logits/rejected': -0.45187222957611084, 'epoch': 0.57} + 57%|█████▋ | 390/681 [16:32<12:08, 2.50s/it] 57%|█████▋ | 391/681 [16:34<12:03, 2.50s/it] {'loss': 1.203, 'grad_norm': 36.1956672668457, 'learning_rate': 2.3076906145138405e-07, 'fcm_dpo/beta': 0.0017130144406110048, 'fcm_dpo/q_t': 0.4409900903701782, 'fcm_dpo/delta': 0.1468581259250641, 'fcm_dpo/margin': 149.905029296875, 'margin_dpo/margin_mean': 149.90504455566406, 'margin_dpo/margin_std': 278.86932373046875, 'logps/chosen': -444.4190673828125, 'logps/rejected': -616.732666015625, 'logps/ref_chosen': -64.32965087890625, 'logps/ref_rejected': -86.73820495605469, 'KL/chosen_KL_mean': -380.08941650390625, 'KL/rejected_KL_mean': -529.9944458007812, 'KL/mean': -455.04193115234375, 'KL/std': 265.760986328125, 'logits/chosen': -0.4843197762966156, 'logits/rejected': -0.47525227069854736, 'epoch': 0.57} + 57%|█████▋ | 391/681 [16:34<12:03, 2.50s/it] 58%|█████▊ | 392/681 [16:37<12:16, 2.55s/it] {'loss': 1.0159, 'grad_norm': 23.744003295898438, 'learning_rate': 2.294897926507156e-07, 'fcm_dpo/beta': 0.001707045128569007, 'fcm_dpo/q_t': 0.3881346583366394, 'fcm_dpo/delta': -0.08683174103498459, 'fcm_dpo/margin': 282.7807922363281, 'margin_dpo/margin_mean': 282.78076171875, 'margin_dpo/margin_std': 284.8000183105469, 'logps/chosen': -375.4222412109375, 'logps/rejected': -707.044921875, 'logps/ref_chosen': -53.50397872924805, 'logps/ref_rejected': -102.34584045410156, 'KL/chosen_KL_mean': -321.9182434082031, 'KL/rejected_KL_mean': -604.6990966796875, 'KL/mean': -463.30865478515625, 'KL/std': 282.4703674316406, 'logits/chosen': -0.46998441219329834, 'logits/rejected': -0.4667205512523651, 'epoch': 0.58} + 58%|█████▊ | 392/681 [16:37<12:16, 2.55s/it] 58%|█████▊ | 393/681 [16:39<12:00, 2.50s/it] {'loss': 1.1229, 'grad_norm': 22.198171615600586, 'learning_rate': 2.2821106431308543e-07, 'fcm_dpo/beta': 0.0016989409923553467, 'fcm_dpo/q_t': 0.4159389138221741, 'fcm_dpo/delta': 0.021702561527490616, 'fcm_dpo/margin': 223.1536102294922, 'margin_dpo/margin_mean': 223.1536102294922, 'margin_dpo/margin_std': 354.7700500488281, 'logps/chosen': -373.547119140625, 'logps/rejected': -622.1956787109375, 'logps/ref_chosen': -46.473915100097656, 'logps/ref_rejected': -71.96885681152344, 'KL/chosen_KL_mean': -327.07318115234375, 'KL/rejected_KL_mean': -550.226806640625, 'KL/mean': -438.6499938964844, 'KL/std': 282.72235107421875, 'logits/chosen': -0.4430672526359558, 'logits/rejected': -0.43764716386795044, 'epoch': 0.58} + 58%|█████▊ | 393/681 [16:40<12:00, 2.50s/it] 58%|█████▊ | 394/681 [16:42<12:08, 2.54s/it] {'loss': 1.0882, 'grad_norm': 21.11285972595215, 'learning_rate': 2.2693291013417452e-07, 'fcm_dpo/beta': 0.0017010483425110579, 'fcm_dpo/q_t': 0.40806007385253906, 'fcm_dpo/delta': -0.001378481974825263, 'fcm_dpo/margin': 235.9246368408203, 'margin_dpo/margin_mean': 235.92465209960938, 'margin_dpo/margin_std': 308.0911865234375, 'logps/chosen': -409.47650146484375, 'logps/rejected': -683.312255859375, 'logps/ref_chosen': -52.91154861450195, 'logps/ref_rejected': -90.8226318359375, 'KL/chosen_KL_mean': -356.56494140625, 'KL/rejected_KL_mean': -592.4896240234375, 'KL/mean': -474.52728271484375, 'KL/std': 290.612548828125, 'logits/chosen': -0.45615267753601074, 'logits/rejected': -0.45724251866340637, 'epoch': 0.58} + 58%|█████▊ | 394/681 [16:42<12:08, 2.54s/it] 58%|█████▊ | 395/681 [16:44<11:50, 2.48s/it] {'loss': 1.0818, 'grad_norm': 23.830419540405273, 'learning_rate': 2.2565536379453404e-07, 'fcm_dpo/beta': 0.0016905716620385647, 'fcm_dpo/q_t': 0.4038696587085724, 'fcm_dpo/delta': -0.023899473249912262, 'fcm_dpo/margin': 250.00375366210938, 'margin_dpo/margin_mean': 250.00375366210938, 'margin_dpo/margin_std': 334.2568359375, 'logps/chosen': -422.8414001464844, 'logps/rejected': -694.0816650390625, 'logps/ref_chosen': -62.546112060546875, 'logps/ref_rejected': -83.78262329101562, 'KL/chosen_KL_mean': -360.2952880859375, 'KL/rejected_KL_mean': -610.299072265625, 'KL/mean': -485.2971496582031, 'KL/std': 286.58233642578125, 'logits/chosen': -0.5101211667060852, 'logits/rejected': -0.5033398270606995, 'epoch': 0.58} + 58%|█████▊ | 395/681 [16:45<11:50, 2.48s/it] 58%|█████▊ | 396/681 [16:47<11:53, 2.50s/it] {'loss': 1.0811, 'grad_norm': 24.08172607421875, 'learning_rate': 2.2437845895869825e-07, 'fcm_dpo/beta': 0.0016928238328546286, 'fcm_dpo/q_t': 0.40855342149734497, 'fcm_dpo/delta': 0.009283696301281452, 'fcm_dpo/margin': 230.993896484375, 'margin_dpo/margin_mean': 230.993896484375, 'margin_dpo/margin_std': 268.80023193359375, 'logps/chosen': -428.4869079589844, 'logps/rejected': -679.1314697265625, 'logps/ref_chosen': -68.99594116210938, 'logps/ref_rejected': -88.64665985107422, 'KL/chosen_KL_mean': -359.490966796875, 'KL/rejected_KL_mean': -590.48486328125, 'KL/mean': -474.9879150390625, 'KL/std': 282.67681884765625, 'logits/chosen': -0.4725229740142822, 'logits/rejected': -0.4491950571537018, 'epoch': 0.58} + 58%|█████▊ | 396/681 [16:47<11:53, 2.50s/it] 58%|█████▊ | 397/681 [16:49<11:47, 2.49s/it] {'loss': 1.0128, 'grad_norm': 34.25861358642578, 'learning_rate': 2.2310222927429716e-07, 'fcm_dpo/beta': 0.0016665621660649776, 'fcm_dpo/q_t': 0.38623836636543274, 'fcm_dpo/delta': -0.09650179743766785, 'fcm_dpo/margin': 294.732421875, 'margin_dpo/margin_mean': 294.732421875, 'margin_dpo/margin_std': 302.2978515625, 'logps/chosen': -404.4608154296875, 'logps/rejected': -741.0322265625, 'logps/ref_chosen': -61.27716827392578, 'logps/ref_rejected': -103.11612701416016, 'KL/chosen_KL_mean': -343.18365478515625, 'KL/rejected_KL_mean': -637.9160766601562, 'KL/mean': -490.54986572265625, 'KL/std': 279.0823974609375, 'logits/chosen': -0.46848201751708984, 'logits/rejected': -0.47495004534721375, 'epoch': 0.58} + 58%|█████▊ | 397/681 [16:50<11:47, 2.49s/it] 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] {'loss': 1.0596, 'grad_norm': 28.262405395507812, 'learning_rate': 2.2182670837116972e-07, 'fcm_dpo/beta': 0.0016521359793841839, 'fcm_dpo/q_t': 0.3986930847167969, 'fcm_dpo/delta': -0.04858284816145897, 'fcm_dpo/margin': 270.20220947265625, 'margin_dpo/margin_mean': 270.20220947265625, 'margin_dpo/margin_std': 338.95098876953125, 'logps/chosen': -434.5057678222656, 'logps/rejected': -745.080078125, 'logps/ref_chosen': -68.15155029296875, 'logps/ref_rejected': -108.52360534667969, 'KL/chosen_KL_mean': -366.3542175292969, 'KL/rejected_KL_mean': -636.5565185546875, 'KL/mean': -501.455322265625, 'KL/std': 288.9761962890625, 'logits/chosen': -0.5342578887939453, 'logits/rejected': -0.53399258852005, 'epoch': 0.58} + 58%|█████▊ | 398/681 [16:52<11:31, 2.44s/it] 59%|█████▊ | 399/681 [16:54<11:32, 2.45s/it] {'loss': 1.1164, 'grad_norm': 28.764699935913086, 'learning_rate': 2.2055192986047804e-07, 'fcm_dpo/beta': 0.0016457277815788984, 'fcm_dpo/q_t': 0.41211044788360596, 'fcm_dpo/delta': 0.01717957854270935, 'fcm_dpo/margin': 232.86073303222656, 'margin_dpo/margin_mean': 232.8607177734375, 'margin_dpo/margin_std': 344.62420654296875, 'logps/chosen': -395.9918212890625, 'logps/rejected': -645.9283447265625, 'logps/ref_chosen': -60.889801025390625, 'logps/ref_rejected': -77.965576171875, 'KL/chosen_KL_mean': -335.1020202636719, 'KL/rejected_KL_mean': -567.9627685546875, 'KL/mean': -451.53240966796875, 'KL/std': 267.4539489746094, 'logits/chosen': -0.5000342130661011, 'logits/rejected': -0.4618859887123108, 'epoch': 0.59} + 59%|█████▊ | 399/681 [16:54<11:32, 2.45s/it] 59%|█████▊ | 400/681 [16:57<11:34, 2.47s/it] {'loss': 0.968, 'grad_norm': 27.097814559936523, 'learning_rate': 2.192779273338215e-07, 'fcm_dpo/beta': 0.0016120923683047295, 'fcm_dpo/q_t': 0.37171751260757446, 'fcm_dpo/delta': -0.15557917952537537, 'fcm_dpo/margin': 339.0614013671875, 'margin_dpo/margin_mean': 339.0614013671875, 'margin_dpo/margin_std': 303.73828125, 'logps/chosen': -375.0321044921875, 'logps/rejected': -755.70263671875, 'logps/ref_chosen': -63.64359664916992, 'logps/ref_rejected': -105.252685546875, 'KL/chosen_KL_mean': -311.3885192871094, 'KL/rejected_KL_mean': -650.449951171875, 'KL/mean': -480.91925048828125, 'KL/std': 279.1466064453125, 'logits/chosen': -0.4866938591003418, 'logits/rejected': -0.4817845821380615, 'epoch': 0.59} + 59%|█████▊ | 400/681 [16:57<11:34, 2.47s/it] 59%|█████▉ | 401/681 [16:59<11:39, 2.50s/it] {'loss': 1.2006, 'grad_norm': 30.14374351501465, 'learning_rate': 2.1800473436235136e-07, 'fcm_dpo/beta': 0.0016232456546276808, 'fcm_dpo/q_t': 0.4327518343925476, 'fcm_dpo/delta': 0.09924636781215668, 'fcm_dpo/margin': 187.20950317382812, 'margin_dpo/margin_mean': 187.20950317382812, 'margin_dpo/margin_std': 391.4587707519531, 'logps/chosen': -424.0182189941406, 'logps/rejected': -637.857177734375, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'KL/chosen_KL_mean': -366.85516357421875, 'KL/rejected_KL_mean': -554.064697265625, 'KL/mean': -460.4599304199219, 'KL/std': 292.738037109375, 'logits/chosen': -0.4865230619907379, 'logits/rejected': -0.4814421534538269, 'epoch': 0.59} + 59%|█████▉ | 401/681 [16:59<11:39, 2.50s/it] 59%|█████▉ | 402/681 [17:02<11:19, 2.44s/it] {'loss': 0.9443, 'grad_norm': 27.241565704345703, 'learning_rate': 2.1673238449588665e-07, 'fcm_dpo/beta': 0.0015893438830971718, 'fcm_dpo/q_t': 0.3651409447193146, 'fcm_dpo/delta': -0.19418612122535706, 'fcm_dpo/margin': 366.76617431640625, 'margin_dpo/margin_mean': 366.76617431640625, 'margin_dpo/margin_std': 324.19622802734375, 'logps/chosen': -323.4923400878906, 'logps/rejected': -720.564208984375, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'KL/chosen_KL_mean': -272.751953125, 'KL/rejected_KL_mean': -639.5181884765625, 'KL/mean': -456.13507080078125, 'KL/std': 318.4268798828125, 'logits/chosen': -0.5259881019592285, 'logits/rejected': -0.5160728693008423, 'epoch': 0.59} + 59%|█████▉ | 402/681 [17:02<11:19, 2.44s/it] 59%|█████▉ | 403/681 [17:04<11:14, 2.43s/it] {'loss': 1.057, 'grad_norm': 27.655994415283203, 'learning_rate': 2.154609112620295e-07, 'fcm_dpo/beta': 0.0015664222883060575, 'fcm_dpo/q_t': 0.40054211020469666, 'fcm_dpo/delta': -0.029441511258482933, 'fcm_dpo/margin': 273.35260009765625, 'margin_dpo/margin_mean': 273.35260009765625, 'margin_dpo/margin_std': 303.58697509765625, 'logps/chosen': -348.0767822265625, 'logps/rejected': -651.5487060546875, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'KL/chosen_KL_mean': -300.929443359375, 'KL/rejected_KL_mean': -574.2821044921875, 'KL/mean': -437.60577392578125, 'KL/std': 293.6529541015625, 'logits/chosen': -0.5488017797470093, 'logits/rejected': -0.542914867401123, 'epoch': 0.59} + 59%|█████▉ | 403/681 [17:04<11:14, 2.43s/it] 59%|█████▉ | 404/681 [17:07<11:17, 2.45s/it] {'loss': 1.091, 'grad_norm': 33.138004302978516, 'learning_rate': 2.1419034816528218e-07, 'fcm_dpo/beta': 0.001556064235046506, 'fcm_dpo/q_t': 0.40406206250190735, 'fcm_dpo/delta': -0.020973514765501022, 'fcm_dpo/margin': 269.9066162109375, 'margin_dpo/margin_mean': 269.9066162109375, 'margin_dpo/margin_std': 377.742919921875, 'logps/chosen': -401.62322998046875, 'logps/rejected': -700.8095703125, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'KL/chosen_KL_mean': -353.74798583984375, 'KL/rejected_KL_mean': -623.654541015625, 'KL/mean': -488.7012939453125, 'KL/std': 293.87213134765625, 'logits/chosen': -0.5281866788864136, 'logits/rejected': -0.5200468301773071, 'epoch': 0.59} + 59%|█████▉ | 404/681 [17:07<11:17, 2.45s/it] 59%|█████▉ | 405/681 [17:09<11:03, 2.40s/it] {'loss': 1.1657, 'grad_norm': 35.584747314453125, 'learning_rate': 2.129207286861638e-07, 'fcm_dpo/beta': 0.0015513843391090631, 'fcm_dpo/q_t': 0.4234468638896942, 'fcm_dpo/delta': -0.03758659213781357, 'fcm_dpo/margin': 219.4093475341797, 'margin_dpo/margin_mean': 219.4093475341797, 'margin_dpo/margin_std': 391.95977783203125, 'logps/chosen': -461.2916259765625, 'logps/rejected': -702.724853515625, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'KL/chosen_KL_mean': -396.12872314453125, 'KL/rejected_KL_mean': -615.5380859375, 'KL/mean': -505.8334045410156, 'KL/std': 310.9896240234375, 'logits/chosen': -0.5194091796875, 'logits/rejected': -0.5121314525604248, 'epoch': 0.59} + 59%|█████▉ | 405/681 [17:09<11:03, 2.40s/it] 60%|█████▉ | 406/681 [17:11<10:57, 2.39s/it] {'loss': 1.0647, 'grad_norm': 25.82929801940918, 'learning_rate': 2.1165208628032861e-07, 'fcm_dpo/beta': 0.0015432301443070173, 'fcm_dpo/q_t': 0.39986056089401245, 'fcm_dpo/delta': -0.0403851754963398, 'fcm_dpo/margin': 284.0757141113281, 'margin_dpo/margin_mean': 284.07568359375, 'margin_dpo/margin_std': 352.7003173828125, 'logps/chosen': -414.14678955078125, 'logps/rejected': -740.560302734375, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'KL/chosen_KL_mean': -364.4059753417969, 'KL/rejected_KL_mean': -648.481689453125, 'KL/mean': -506.44384765625, 'KL/std': 309.79296875, 'logits/chosen': -0.54176926612854, 'logits/rejected': -0.5563890933990479, 'epoch': 0.6} + 60%|█████▉ | 406/681 [17:11<10:57, 2.39s/it] 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] {'loss': 1.1752, 'grad_norm': 35.46054458618164, 'learning_rate': 2.1038445437768375e-07, 'fcm_dpo/beta': 0.0015315297059714794, 'fcm_dpo/q_t': 0.42981648445129395, 'fcm_dpo/delta': -0.01038383599370718, 'fcm_dpo/margin': 194.84068298339844, 'margin_dpo/margin_mean': 194.8406982421875, 'margin_dpo/margin_std': 331.57635498046875, 'logps/chosen': -432.8283996582031, 'logps/rejected': -648.8504638671875, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.51209259033203, 'KL/chosen_KL_mean': -376.4976806640625, 'KL/rejected_KL_mean': -571.33837890625, 'KL/mean': -473.91802978515625, 'KL/std': 257.8326110839844, 'logits/chosen': -0.5773499011993408, 'logits/rejected': -0.5510052442550659, 'epoch': 0.6} + 60%|█████▉ | 407/681 [17:14<11:03, 2.42s/it] 60%|█████▉ | 408/681 [17:16<11:21, 2.50s/it] {'loss': 1.1414, 'grad_norm': 37.8366813659668, 'learning_rate': 2.0911786638150872e-07, 'fcm_dpo/beta': 0.0015524220652878284, 'fcm_dpo/q_t': 0.4264683127403259, 'fcm_dpo/delta': 0.08978626132011414, 'fcm_dpo/margin': 201.551025390625, 'margin_dpo/margin_mean': 201.551025390625, 'margin_dpo/margin_std': 274.039306640625, 'logps/chosen': -452.4508056640625, 'logps/rejected': -674.3094482421875, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'KL/chosen_KL_mean': -382.6614990234375, 'KL/rejected_KL_mean': -584.2125244140625, 'KL/mean': -483.43701171875, 'KL/std': 246.0572967529297, 'logits/chosen': -0.57146817445755, 'logits/rejected': -0.5515158176422119, 'epoch': 0.6} + 60%|█████▉ | 408/681 [17:16<11:21, 2.50s/it] 60%|██████ | 409/681 [17:19<11:35, 2.56s/it] {'loss': 1.158, 'grad_norm': 39.993656158447266, 'learning_rate': 2.0785235566757517e-07, 'fcm_dpo/beta': 0.0015834926161915064, 'fcm_dpo/q_t': 0.4273938536643982, 'fcm_dpo/delta': 0.09120422601699829, 'fcm_dpo/margin': 196.54339599609375, 'margin_dpo/margin_mean': 196.5434112548828, 'margin_dpo/margin_std': 309.50628662109375, 'logps/chosen': -441.5321960449219, 'logps/rejected': -655.6624755859375, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'KL/chosen_KL_mean': -374.21478271484375, 'KL/rejected_KL_mean': -570.7581787109375, 'KL/mean': -472.4864501953125, 'KL/std': 268.3485107421875, 'logits/chosen': -0.5402634143829346, 'logits/rejected': -0.5223067998886108, 'epoch': 0.6} + 60%|██████ | 409/681 [17:19<11:35, 2.56s/it] 60%|██████ | 410/681 [17:22<11:37, 2.57s/it] {'loss': 1.0969, 'grad_norm': 29.003385543823242, 'learning_rate': 2.065879555832674e-07, 'fcm_dpo/beta': 0.0015911536756902933, 'fcm_dpo/q_t': 0.41136714816093445, 'fcm_dpo/delta': 0.025650672614574432, 'fcm_dpo/margin': 235.87872314453125, 'margin_dpo/margin_mean': 235.8787384033203, 'margin_dpo/margin_std': 290.5599365234375, 'logps/chosen': -386.8587341308594, 'logps/rejected': -654.4710693359375, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'KL/chosen_KL_mean': -335.39337158203125, 'KL/rejected_KL_mean': -571.2720947265625, 'KL/mean': -453.332763671875, 'KL/std': 258.4756774902344, 'logits/chosen': -0.5437331795692444, 'logits/rejected': -0.5466384887695312, 'epoch': 0.6} + 60%|██████ | 410/681 [17:22<11:37, 2.57s/it] 60%|██████ | 411/681 [17:24<11:19, 2.52s/it] {'loss': 1.1209, 'grad_norm': 26.219621658325195, 'learning_rate': 2.0532469944670343e-07, 'fcm_dpo/beta': 0.001590752974152565, 'fcm_dpo/q_t': 0.41678670048713684, 'fcm_dpo/delta': 0.033395539969205856, 'fcm_dpo/margin': 230.70909118652344, 'margin_dpo/margin_mean': 230.70907592773438, 'margin_dpo/margin_std': 329.6683349609375, 'logps/chosen': -392.34075927734375, 'logps/rejected': -651.4375, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'KL/chosen_KL_mean': -340.03350830078125, 'KL/rejected_KL_mean': -570.7425537109375, 'KL/mean': -455.3880615234375, 'KL/std': 282.7340393066406, 'logits/chosen': -0.520312488079071, 'logits/rejected': -0.5262941718101501, 'epoch': 0.6} + 60%|██████ | 411/681 [17:24<11:19, 2.52s/it] 60%|██████ | 412/681 [17:26<11:08, 2.48s/it] {'loss': 1.0857, 'grad_norm': 40.66035079956055, 'learning_rate': 2.0406262054585738e-07, 'fcm_dpo/beta': 0.0016053288709372282, 'fcm_dpo/q_t': 0.4056549072265625, 'fcm_dpo/delta': -0.006643663160502911, 'fcm_dpo/margin': 253.1304931640625, 'margin_dpo/margin_mean': 253.13047790527344, 'margin_dpo/margin_std': 327.401611328125, 'logps/chosen': -408.5992736816406, 'logps/rejected': -708.6464233398438, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.0608139038086, 'KL/chosen_KL_mean': -355.45513916015625, 'KL/rejected_KL_mean': -608.5856323242188, 'KL/mean': -482.0203857421875, 'KL/std': 281.10675048828125, 'logits/chosen': -0.5641697645187378, 'logits/rejected': -0.5924566984176636, 'epoch': 0.6} + 60%|██████ | 412/681 [17:27<11:08, 2.48s/it] 61%|██████ | 413/681 [17:29<11:21, 2.54s/it] {'loss': 1.0903, 'grad_norm': 26.795982360839844, 'learning_rate': 2.0280175213768205e-07, 'fcm_dpo/beta': 0.001608746824786067, 'fcm_dpo/q_t': 0.40697982907295227, 'fcm_dpo/delta': 0.007142549380660057, 'fcm_dpo/margin': 244.28973388671875, 'margin_dpo/margin_mean': 244.28973388671875, 'margin_dpo/margin_std': 309.2333984375, 'logps/chosen': -436.78277587890625, 'logps/rejected': -718.9639892578125, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'KL/chosen_KL_mean': -375.2008056640625, 'KL/rejected_KL_mean': -619.4906005859375, 'KL/mean': -497.345703125, 'KL/std': 274.7577819824219, 'logits/chosen': -0.4796082675457001, 'logits/rejected': -0.4848223328590393, 'epoch': 0.61} + 61%|██████ | 413/681 [17:29<11:21, 2.54s/it] 61%|██████ | 414/681 [17:32<11:20, 2.55s/it] {'loss': 1.0622, 'grad_norm': 38.35430908203125, 'learning_rate': 2.0154212744723247e-07, 'fcm_dpo/beta': 0.0016081007197499275, 'fcm_dpo/q_t': 0.3990900218486786, 'fcm_dpo/delta': -0.035320840775966644, 'fcm_dpo/margin': 269.31365966796875, 'margin_dpo/margin_mean': 269.3136901855469, 'margin_dpo/margin_std': 310.88043212890625, 'logps/chosen': -385.00823974609375, 'logps/rejected': -695.3369750976562, 'logps/ref_chosen': -46.63148498535156, 'logps/ref_rejected': -87.64653015136719, 'KL/chosen_KL_mean': -338.37677001953125, 'KL/rejected_KL_mean': -607.6904296875, 'KL/mean': -473.0335998535156, 'KL/std': 263.765625, 'logits/chosen': -0.4838346838951111, 'logits/rejected': -0.47299548983573914, 'epoch': 0.61} + 61%|██████ | 414/681 [17:32<11:20, 2.55s/it] 61%|██████ | 415/681 [17:34<11:34, 2.61s/it] {'loss': 1.1386, 'grad_norm': 31.549434661865234, 'learning_rate': 2.002837796667909e-07, 'fcm_dpo/beta': 0.0016053159488365054, 'fcm_dpo/q_t': 0.4219440221786499, 'fcm_dpo/delta': 0.06679742783308029, 'fcm_dpo/margin': 208.97039794921875, 'margin_dpo/margin_mean': 208.97039794921875, 'margin_dpo/margin_std': 311.5289306640625, 'logps/chosen': -457.2638854980469, 'logps/rejected': -688.093505859375, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'KL/chosen_KL_mean': -378.6455993652344, 'KL/rejected_KL_mean': -587.6160278320312, 'KL/mean': -483.13079833984375, 'KL/std': 267.6329345703125, 'logits/chosen': -0.5333505868911743, 'logits/rejected': -0.5307386517524719, 'epoch': 0.61} + 61%|██████ | 415/681 [17:35<11:34, 2.61s/it] 61%|██████ | 416/681 [17:37<11:27, 2.60s/it] {'loss': 0.9955, 'grad_norm': 32.9141731262207, 'learning_rate': 1.990267419549914e-07, 'fcm_dpo/beta': 0.0015930493827909231, 'fcm_dpo/q_t': 0.38171255588531494, 'fcm_dpo/delta': -0.11073094606399536, 'fcm_dpo/margin': 317.17578125, 'margin_dpo/margin_mean': 317.17578125, 'margin_dpo/margin_std': 294.7266845703125, 'logps/chosen': -404.93121337890625, 'logps/rejected': -754.3966064453125, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'KL/chosen_KL_mean': -346.6520690917969, 'KL/rejected_KL_mean': -663.827880859375, 'KL/mean': -505.2399597167969, 'KL/std': 294.3641357421875, 'logits/chosen': -0.5502901077270508, 'logits/rejected': -0.5549454092979431, 'epoch': 0.61} + 61%|██████ | 416/681 [17:37<11:27, 2.60s/it] 61%|██████ | 417/681 [17:39<11:07, 2.53s/it] {'loss': 1.0551, 'grad_norm': 31.803016662597656, 'learning_rate': 1.9777104743594686e-07, 'fcm_dpo/beta': 0.0015767996665090322, 'fcm_dpo/q_t': 0.40123608708381653, 'fcm_dpo/delta': -0.01895320415496826, 'fcm_dpo/margin': 265.19757080078125, 'margin_dpo/margin_mean': 265.19757080078125, 'margin_dpo/margin_std': 272.559814453125, 'logps/chosen': -391.7524108886719, 'logps/rejected': -674.903076171875, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'KL/chosen_KL_mean': -341.5537109375, 'KL/rejected_KL_mean': -606.7512817382812, 'KL/mean': -474.1524658203125, 'KL/std': 268.6434631347656, 'logits/chosen': -0.4980270266532898, 'logits/rejected': -0.47427403926849365, 'epoch': 0.61} + 61%|██████ | 417/681 [17:39<11:07, 2.53s/it] 61%|██████▏ | 418/681 [17:42<11:07, 2.54s/it] {'loss': 1.0853, 'grad_norm': 25.314533233642578, 'learning_rate': 1.965167291983757e-07, 'fcm_dpo/beta': 0.0015800942201167345, 'fcm_dpo/q_t': 0.40411561727523804, 'fcm_dpo/delta': -0.026646777987480164, 'fcm_dpo/margin': 268.684326171875, 'margin_dpo/margin_mean': 268.684326171875, 'margin_dpo/margin_std': 358.3873291015625, 'logps/chosen': -465.2908630371094, 'logps/rejected': -756.688232421875, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'KL/chosen_KL_mean': -383.3123779296875, 'KL/rejected_KL_mean': -651.9967041015625, 'KL/mean': -517.654541015625, 'KL/std': 306.8751220703125, 'logits/chosen': -0.5755934715270996, 'logits/rejected': -0.5533599853515625, 'epoch': 0.61} + 61%|██████▏ | 418/681 [17:42<11:07, 2.54s/it] 62%|██████▏ | 419/681 [17:45<11:04, 2.54s/it] {'loss': 1.0295, 'grad_norm': 24.278188705444336, 'learning_rate': 1.9526382029472988e-07, 'fcm_dpo/beta': 0.0015506461495533586, 'fcm_dpo/q_t': 0.3909485936164856, 'fcm_dpo/delta': -0.06802451610565186, 'fcm_dpo/margin': 299.801513671875, 'margin_dpo/margin_mean': 299.801513671875, 'margin_dpo/margin_std': 311.72760009765625, 'logps/chosen': -401.7311096191406, 'logps/rejected': -740.1671142578125, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'KL/chosen_KL_mean': -348.782470703125, 'KL/rejected_KL_mean': -648.583984375, 'KL/mean': -498.6832275390625, 'KL/std': 267.0831298828125, 'logits/chosen': -0.5322695970535278, 'logits/rejected': -0.5325411558151245, 'epoch': 0.62} + 62%|██████▏ | 419/681 [17:45<11:04, 2.54s/it] 62%|██████▏ | 420/681 [17:47<11:00, 2.53s/it] {'loss': 1.2169, 'grad_norm': 60.59740447998047, 'learning_rate': 1.9401235374032425e-07, 'fcm_dpo/beta': 0.001575858099386096, 'fcm_dpo/q_t': 0.4371680021286011, 'fcm_dpo/delta': 0.12323421239852905, 'fcm_dpo/margin': 177.66058349609375, 'margin_dpo/margin_mean': 177.66058349609375, 'margin_dpo/margin_std': 394.1659240722656, 'logps/chosen': -521.82958984375, 'logps/rejected': -691.0401000976562, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'KL/chosen_KL_mean': -444.0596923828125, 'KL/rejected_KL_mean': -621.72021484375, 'KL/mean': -532.8899536132812, 'KL/std': 289.5302734375, 'logits/chosen': -0.5358173847198486, 'logits/rejected': -0.4956286549568176, 'epoch': 0.62} + 62%|██████▏ | 420/681 [17:47<11:00, 2.53s/it] 62%|██████▏ | 421/681 [17:50<11:10, 2.58s/it] {'loss': 1.1354, 'grad_norm': 29.370344161987305, 'learning_rate': 1.9276236251246653e-07, 'fcm_dpo/beta': 0.001605308847501874, 'fcm_dpo/q_t': 0.4202464818954468, 'fcm_dpo/delta': 0.06441329419612885, 'fcm_dpo/margin': 210.0072021484375, 'margin_dpo/margin_mean': 210.0072021484375, 'margin_dpo/margin_std': 298.9598388671875, 'logps/chosen': -419.45892333984375, 'logps/rejected': -664.981689453125, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'KL/chosen_KL_mean': -365.69305419921875, 'KL/rejected_KL_mean': -575.7002563476562, 'KL/mean': -470.6966552734375, 'KL/std': 274.4378967285156, 'logits/chosen': -0.5225532054901123, 'logits/rejected': -0.5048704147338867, 'epoch': 0.62} + 62%|██████▏ | 421/681 [17:50<11:10, 2.58s/it] 62%|██████▏ | 422/681 [17:53<11:27, 2.65s/it] {'loss': 1.102, 'grad_norm': 34.15210723876953, 'learning_rate': 1.9151387954958792e-07, 'fcm_dpo/beta': 0.0016051906859502196, 'fcm_dpo/q_t': 0.40865635871887207, 'fcm_dpo/delta': 0.007199084386229515, 'fcm_dpo/margin': 244.88333129882812, 'margin_dpo/margin_mean': 244.8833465576172, 'margin_dpo/margin_std': 341.8487548828125, 'logps/chosen': -487.01531982421875, 'logps/rejected': -751.12841796875, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'KL/chosen_KL_mean': -418.38153076171875, 'KL/rejected_KL_mean': -663.264892578125, 'KL/mean': -540.8232421875, 'KL/std': 289.33966064453125, 'logits/chosen': -0.555054783821106, 'logits/rejected': -0.5556162595748901, 'epoch': 0.62} + 62%|██████▏ | 422/681 [17:53<11:27, 2.65s/it] 62%|██████▏ | 423/681 [17:55<11:04, 2.58s/it] {'loss': 1.0566, 'grad_norm': 28.743013381958008, 'learning_rate': 1.902669377503756e-07, 'fcm_dpo/beta': 0.0015967879444360733, 'fcm_dpo/q_t': 0.39945095777511597, 'fcm_dpo/delta': -0.0316154807806015, 'fcm_dpo/margin': 269.41815185546875, 'margin_dpo/margin_mean': 269.41815185546875, 'margin_dpo/margin_std': 303.0052795410156, 'logps/chosen': -462.525390625, 'logps/rejected': -763.2598266601562, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'KL/chosen_KL_mean': -407.53509521484375, 'KL/rejected_KL_mean': -676.9532470703125, 'KL/mean': -542.244140625, 'KL/std': 281.966796875, 'logits/chosen': -0.5464938879013062, 'logits/rejected': -0.5520018339157104, 'epoch': 0.62} + 62%|██████▏ | 423/681 [17:55<11:04, 2.58s/it] 62%|██████▏ | 424/681 [17:58<11:09, 2.61s/it] {'loss': 1.1061, 'grad_norm': 30.807126998901367, 'learning_rate': 1.890215699729057e-07, 'fcm_dpo/beta': 0.0015932518290355802, 'fcm_dpo/q_t': 0.4120855927467346, 'fcm_dpo/delta': 0.014376441016793251, 'fcm_dpo/margin': 242.24057006835938, 'margin_dpo/margin_mean': 242.24058532714844, 'margin_dpo/margin_std': 339.5845947265625, 'logps/chosen': -434.31494140625, 'logps/rejected': -687.0225830078125, 'logps/ref_chosen': -56.01192092895508, 'logps/ref_rejected': -66.47896575927734, 'KL/chosen_KL_mean': -378.3030090332031, 'KL/rejected_KL_mean': -620.5435791015625, 'KL/mean': -499.42327880859375, 'KL/std': 280.09075927734375, 'logits/chosen': -0.5540552139282227, 'logits/rejected': -0.5233687162399292, 'epoch': 0.62} + 62%|██████▏ | 424/681 [17:58<11:09, 2.61s/it] 62%|██████▏ | 425/681 [18:00<11:00, 2.58s/it] {'loss': 1.1387, 'grad_norm': 38.50931930541992, 'learning_rate': 1.8777780903377732e-07, 'fcm_dpo/beta': 0.00161844864487648, 'fcm_dpo/q_t': 0.42051440477371216, 'fcm_dpo/delta': 0.06421151012182236, 'fcm_dpo/margin': 208.72262573242188, 'margin_dpo/margin_mean': 208.72262573242188, 'margin_dpo/margin_std': 312.8076171875, 'logps/chosen': -456.50347900390625, 'logps/rejected': -714.2825317382812, 'logps/ref_chosen': -46.86899948120117, 'logps/ref_rejected': -95.92545318603516, 'KL/chosen_KL_mean': -409.63446044921875, 'KL/rejected_KL_mean': -618.3570556640625, 'KL/mean': -513.9957885742188, 'KL/std': 262.7354736328125, 'logits/chosen': -0.5310732126235962, 'logits/rejected': -0.5318828225135803, 'epoch': 0.62} + 62%|██████▏ | 425/681 [18:00<11:00, 2.58s/it] 63%|██████▎ | 426/681 [18:03<11:05, 2.61s/it] {'loss': 1.0974, 'grad_norm': 31.958324432373047, 'learning_rate': 1.8653568770724803e-07, 'fcm_dpo/beta': 0.0016317331464961171, 'fcm_dpo/q_t': 0.409078449010849, 'fcm_dpo/delta': 0.011202432215213776, 'fcm_dpo/margin': 238.22601318359375, 'margin_dpo/margin_mean': 238.22601318359375, 'margin_dpo/margin_std': 305.9199523925781, 'logps/chosen': -462.672119140625, 'logps/rejected': -705.5811767578125, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'KL/chosen_KL_mean': -386.08856201171875, 'KL/rejected_KL_mean': -624.3145751953125, 'KL/mean': -505.2015686035156, 'KL/std': 267.33624267578125, 'logits/chosen': -0.5409977436065674, 'logits/rejected': -0.501447319984436, 'epoch': 0.63} + 63%|██████▎ | 426/681 [18:03<11:05, 2.61s/it] 63%|██████▎ | 427/681 [18:05<11:03, 2.61s/it] {'loss': 1.1615, 'grad_norm': 26.211519241333008, 'learning_rate': 1.8529523872436977e-07, 'fcm_dpo/beta': 0.0016468719113618135, 'fcm_dpo/q_t': 0.4312647581100464, 'fcm_dpo/delta': 0.10344026982784271, 'fcm_dpo/margin': 182.03077697753906, 'margin_dpo/margin_mean': 182.03079223632812, 'margin_dpo/margin_std': 283.958251953125, 'logps/chosen': -422.7442321777344, 'logps/rejected': -618.4871215820312, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.5660171508789, 'KL/chosen_KL_mean': -357.8903503417969, 'KL/rejected_KL_mean': -539.921142578125, 'KL/mean': -448.9057312011719, 'KL/std': 236.3748779296875, 'logits/chosen': -0.5992106199264526, 'logits/rejected': -0.5759471654891968, 'epoch': 0.63} + 63%|██████▎ | 427/681 [18:05<11:03, 2.61s/it] 63%|██████▎ | 428/681 [18:08<11:06, 2.63s/it] {'loss': 1.0869, 'grad_norm': 35.00596237182617, 'learning_rate': 1.8405649477212697e-07, 'fcm_dpo/beta': 0.001645084354095161, 'fcm_dpo/q_t': 0.4009571671485901, 'fcm_dpo/delta': -0.038231100887060165, 'fcm_dpo/margin': 265.2386779785156, 'margin_dpo/margin_mean': 265.2386474609375, 'margin_dpo/margin_std': 379.43597412109375, 'logps/chosen': -491.640625, 'logps/rejected': -797.5244140625, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28181457519531, 'KL/chosen_KL_mean': -429.00396728515625, 'KL/rejected_KL_mean': -694.2426147460938, 'KL/mean': -561.623291015625, 'KL/std': 311.591064453125, 'logits/chosen': -0.5608881711959839, 'logits/rejected': -0.559416651725769, 'epoch': 0.63} + 63%|██████▎ | 428/681 [18:08<11:06, 2.63s/it] 63%|██████▎ | 429/681 [18:11<11:01, 2.62s/it] {'loss': 1.1646, 'grad_norm': 28.7783145904541, 'learning_rate': 1.828194884925749e-07, 'fcm_dpo/beta': 0.0016439331229776144, 'fcm_dpo/q_t': 0.42502105236053467, 'fcm_dpo/delta': -0.013148479163646698, 'fcm_dpo/margin': 195.0811767578125, 'margin_dpo/margin_mean': 195.08114624023438, 'margin_dpo/margin_std': 329.076171875, 'logps/chosen': -504.8882141113281, 'logps/rejected': -710.5302734375, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'KL/chosen_KL_mean': -423.6542053222656, 'KL/rejected_KL_mean': -618.7353515625, 'KL/mean': -521.1947631835938, 'KL/std': 257.50677490234375, 'logits/chosen': -0.5880295634269714, 'logits/rejected': -0.5610803961753845, 'epoch': 0.63} + 63%|██████▎ | 429/681 [18:11<11:01, 2.62s/it] 63%|██████▎ | 430/681 [18:13<11:07, 2.66s/it] {'loss': 1.1134, 'grad_norm': 30.429576873779297, 'learning_rate': 1.8158425248197928e-07, 'fcm_dpo/beta': 0.0016547690611332655, 'fcm_dpo/q_t': 0.4177946150302887, 'fcm_dpo/delta': 0.04677361994981766, 'fcm_dpo/margin': 214.4279022216797, 'margin_dpo/margin_mean': 214.42791748046875, 'margin_dpo/margin_std': 282.10595703125, 'logps/chosen': -410.7891845703125, 'logps/rejected': -668.7196044921875, 'logps/ref_chosen': -60.920326232910156, 'logps/ref_rejected': -104.42280578613281, 'KL/chosen_KL_mean': -349.86883544921875, 'KL/rejected_KL_mean': -564.2967529296875, 'KL/mean': -457.08282470703125, 'KL/std': 251.49932861328125, 'logits/chosen': -0.5739535093307495, 'logits/rejected': -0.5771204233169556, 'epoch': 0.63} + 63%|██████▎ | 430/681 [18:13<11:07, 2.66s/it] 63%|██████▎ | 431/681 [18:16<11:06, 2.67s/it] {'loss': 1.0381, 'grad_norm': 32.40290451049805, 'learning_rate': 1.8035081928995788e-07, 'fcm_dpo/beta': 0.0016404774505645037, 'fcm_dpo/q_t': 0.39278194308280945, 'fcm_dpo/delta': -0.06328192353248596, 'fcm_dpo/margin': 280.59796142578125, 'margin_dpo/margin_mean': 280.59796142578125, 'margin_dpo/margin_std': 304.55780029296875, 'logps/chosen': -391.6836853027344, 'logps/rejected': -707.7731323242188, 'logps/ref_chosen': -57.34874725341797, 'logps/ref_rejected': -92.84022521972656, 'KL/chosen_KL_mean': -334.3349609375, 'KL/rejected_KL_mean': -614.932861328125, 'KL/mean': -474.6339111328125, 'KL/std': 273.7649230957031, 'logits/chosen': -0.5994788408279419, 'logits/rejected': -0.5967893600463867, 'epoch': 0.63} + 63%|██████▎ | 431/681 [18:16<11:06, 2.67s/it] 63%|██████▎ | 432/681 [18:19<11:09, 2.69s/it] {'loss': 1.0171, 'grad_norm': 35.32616424560547, 'learning_rate': 1.791192214186223e-07, 'fcm_dpo/beta': 0.0016270647756755352, 'fcm_dpo/q_t': 0.38900789618492126, 'fcm_dpo/delta': -0.08062286674976349, 'fcm_dpo/margin': 292.85833740234375, 'margin_dpo/margin_mean': 292.85833740234375, 'margin_dpo/margin_std': 278.27447509765625, 'logps/chosen': -393.72308349609375, 'logps/rejected': -714.086181640625, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'KL/chosen_KL_mean': -322.6483154296875, 'KL/rejected_KL_mean': -615.506591796875, 'KL/mean': -469.07745361328125, 'KL/std': 274.3560485839844, 'logits/chosen': -0.5596065521240234, 'logits/rejected': -0.5433411598205566, 'epoch': 0.63} + 63%|██████▎ | 432/681 [18:19<11:09, 2.69s/it] 64%|██████▎ | 433/681 [18:21<11:01, 2.67s/it] {'loss': 1.1663, 'grad_norm': 36.68017578125, 'learning_rate': 1.7788949132172193e-07, 'fcm_dpo/beta': 0.001634822110645473, 'fcm_dpo/q_t': 0.425261914730072, 'fcm_dpo/delta': 0.08947563171386719, 'fcm_dpo/margin': 191.52005004882812, 'margin_dpo/margin_mean': 191.52005004882812, 'margin_dpo/margin_std': 325.86212158203125, 'logps/chosen': -475.02215576171875, 'logps/rejected': -704.2198486328125, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'KL/chosen_KL_mean': -416.74896240234375, 'KL/rejected_KL_mean': -608.2689208984375, 'KL/mean': -512.5089721679688, 'KL/std': 277.8785095214844, 'logits/chosen': -0.6115612983703613, 'logits/rejected': -0.5973314046859741, 'epoch': 0.64} + 64%|██████▎ | 433/681 [18:22<11:01, 2.67s/it] 64%|██████▎ | 434/681 [18:24<10:54, 2.65s/it] {'loss': 1.1326, 'grad_norm': 33.74725341796875, 'learning_rate': 1.7666166140378853e-07, 'fcm_dpo/beta': 0.0016470999689772725, 'fcm_dpo/q_t': 0.4220370948314667, 'fcm_dpo/delta': 0.05136201158165932, 'fcm_dpo/margin': 212.747314453125, 'margin_dpo/margin_mean': 212.747314453125, 'margin_dpo/margin_std': 328.4437561035156, 'logps/chosen': -442.5596618652344, 'logps/rejected': -671.8319091796875, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'KL/chosen_KL_mean': -380.5859375, 'KL/rejected_KL_mean': -593.333251953125, 'KL/mean': -486.9596252441406, 'KL/std': 263.0496520996094, 'logits/chosen': -0.6359285116195679, 'logits/rejected': -0.6292107105255127, 'epoch': 0.64} + 64%|██████▎ | 434/681 [18:24<10:54, 2.65s/it] 64%|██████▍ | 435/681 [18:26<10:21, 2.53s/it] {'loss': 1.0826, 'grad_norm': 35.001678466796875, 'learning_rate': 1.7543576401928218e-07, 'fcm_dpo/beta': 0.0016516190953552723, 'fcm_dpo/q_t': 0.4048606753349304, 'fcm_dpo/delta': -0.0136133236810565, 'fcm_dpo/margin': 250.07640075683594, 'margin_dpo/margin_mean': 250.076416015625, 'margin_dpo/margin_std': 318.9538269042969, 'logps/chosen': -397.61865234375, 'logps/rejected': -683.7598876953125, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'KL/chosen_KL_mean': -346.1165771484375, 'KL/rejected_KL_mean': -596.1929931640625, 'KL/mean': -471.15478515625, 'KL/std': 277.8624267578125, 'logits/chosen': -0.643917441368103, 'logits/rejected': -0.6357216835021973, 'epoch': 0.64} + 64%|██████▍ | 435/681 [18:26<10:21, 2.53s/it] 64%|██████▍ | 436/681 [18:29<10:27, 2.56s/it] {'loss': 1.1098, 'grad_norm': 35.268802642822266, 'learning_rate': 1.742118314717391e-07, 'fcm_dpo/beta': 0.0016546837287023664, 'fcm_dpo/q_t': 0.41425737738609314, 'fcm_dpo/delta': 0.032419584691524506, 'fcm_dpo/margin': 222.81613159179688, 'margin_dpo/margin_mean': 222.81613159179688, 'margin_dpo/margin_std': 298.9530029296875, 'logps/chosen': -430.83001708984375, 'logps/rejected': -664.97021484375, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'KL/chosen_KL_mean': -359.42626953125, 'KL/rejected_KL_mean': -582.242431640625, 'KL/mean': -470.8343505859375, 'KL/std': 251.36630249023438, 'logits/chosen': -0.6128599643707275, 'logits/rejected': -0.5752372741699219, 'epoch': 0.64} + 64%|██████▍ | 436/681 [18:29<10:27, 2.56s/it] 64%|██████▍ | 437/681 [18:32<10:35, 2.60s/it] {'loss': 1.1091, 'grad_norm': 27.045368194580078, 'learning_rate': 1.7298989601292036e-07, 'fcm_dpo/beta': 0.001670231344178319, 'fcm_dpo/q_t': 0.41457653045654297, 'fcm_dpo/delta': 0.036831051111221313, 'fcm_dpo/margin': 218.2496337890625, 'margin_dpo/margin_mean': 218.2496337890625, 'margin_dpo/margin_std': 285.473388671875, 'logps/chosen': -436.74017333984375, 'logps/rejected': -672.2890625, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'KL/chosen_KL_mean': -371.99591064453125, 'KL/rejected_KL_mean': -590.2454833984375, 'KL/mean': -481.1207275390625, 'KL/std': 237.38775634765625, 'logits/chosen': -0.6260372400283813, 'logits/rejected': -0.5966402292251587, 'epoch': 0.64} + 64%|██████▍ | 437/681 [18:32<10:35, 2.60s/it] 64%|██████▍ | 438/681 [18:34<10:18, 2.54s/it] {'loss': 1.0735, 'grad_norm': 36.76564025878906, 'learning_rate': 1.7176998984196144e-07, 'fcm_dpo/beta': 0.0016619900707155466, 'fcm_dpo/q_t': 0.40174010396003723, 'fcm_dpo/delta': -0.022839529439806938, 'fcm_dpo/margin': 253.57174682617188, 'margin_dpo/margin_mean': 253.57174682617188, 'margin_dpo/margin_std': 311.7683410644531, 'logps/chosen': -452.14678955078125, 'logps/rejected': -729.7767333984375, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682800292969, 'KL/chosen_KL_mean': -393.12811279296875, 'KL/rejected_KL_mean': -646.6998901367188, 'KL/mean': -519.9140014648438, 'KL/std': 283.584716796875, 'logits/chosen': -0.6138747930526733, 'logits/rejected': -0.5866237878799438, 'epoch': 0.64} + 64%|██████▍ | 438/681 [18:34<10:18, 2.54s/it] 64%|██████▍ | 439/681 [18:37<10:07, 2.51s/it] {'loss': 1.1405, 'grad_norm': 30.52996253967285, 'learning_rate': 1.7055214510452458e-07, 'fcm_dpo/beta': 0.0016392945544794202, 'fcm_dpo/q_t': 0.4188630282878876, 'fcm_dpo/delta': -0.08405376225709915, 'fcm_dpo/margin': 217.56015014648438, 'margin_dpo/margin_mean': 217.56015014648438, 'margin_dpo/margin_std': 336.02734375, 'logps/chosen': -454.71209716796875, 'logps/rejected': -702.4736328125, 'logps/ref_chosen': -53.78407669067383, 'logps/ref_rejected': -83.98545837402344, 'KL/chosen_KL_mean': -400.92803955078125, 'KL/rejected_KL_mean': -618.4881591796875, 'KL/mean': -509.7080993652344, 'KL/std': 285.30078125, 'logits/chosen': -0.6197365522384644, 'logits/rejected': -0.6234545707702637, 'epoch': 0.64} + 64%|██████▍ | 439/681 [18:37<10:07, 2.51s/it] 65%|██████▍ | 440/681 [18:39<09:54, 2.47s/it] {'loss': 1.1009, 'grad_norm': 46.26620101928711, 'learning_rate': 1.6933639389195134e-07, 'fcm_dpo/beta': 0.001642939867451787, 'fcm_dpo/q_t': 0.4110341966152191, 'fcm_dpo/delta': -0.0017192382365465164, 'fcm_dpo/margin': 244.39990234375, 'margin_dpo/margin_mean': 244.39990234375, 'margin_dpo/margin_std': 356.36102294921875, 'logps/chosen': -503.59625244140625, 'logps/rejected': -765.9271850585938, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'KL/chosen_KL_mean': -425.029541015625, 'KL/rejected_KL_mean': -669.429443359375, 'KL/mean': -547.2294921875, 'KL/std': 345.5404968261719, 'logits/chosen': -0.6939189434051514, 'logits/rejected': -0.6905303597450256, 'epoch': 0.65} + 65%|██████▍ | 440/681 [18:39<09:54, 2.47s/it] 65%|██████▍ | 441/681 [18:42<10:04, 2.52s/it] {'loss': 1.1481, 'grad_norm': 47.700626373291016, 'learning_rate': 1.681227682404166e-07, 'fcm_dpo/beta': 0.001648401957936585, 'fcm_dpo/q_t': 0.41518956422805786, 'fcm_dpo/delta': 0.011800557374954224, 'fcm_dpo/margin': 235.54818725585938, 'margin_dpo/margin_mean': 235.54818725585938, 'margin_dpo/margin_std': 421.46875, 'logps/chosen': -541.8203125, 'logps/rejected': -813.014892578125, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'KL/chosen_KL_mean': -480.99591064453125, 'KL/rejected_KL_mean': -716.5440673828125, 'KL/mean': -598.77001953125, 'KL/std': 346.46795654296875, 'logits/chosen': -0.6772704720497131, 'logits/rejected': -0.661035418510437, 'epoch': 0.65} + 65%|██████▍ | 441/681 [18:42<10:04, 2.52s/it] 65%|██████▍ | 442/681 [18:44<09:52, 2.48s/it] {'loss': 1.0748, 'grad_norm': 32.922607421875, 'learning_rate': 1.669113001300851e-07, 'fcm_dpo/beta': 0.0016397257568314672, 'fcm_dpo/q_t': 0.4000922739505768, 'fcm_dpo/delta': -0.055591996759176254, 'fcm_dpo/margin': 276.02154541015625, 'margin_dpo/margin_mean': 276.021484375, 'margin_dpo/margin_std': 379.1744384765625, 'logps/chosen': -461.625732421875, 'logps/rejected': -767.17529296875, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'KL/chosen_KL_mean': -414.614501953125, 'KL/rejected_KL_mean': -690.6361083984375, 'KL/mean': -552.625244140625, 'KL/std': 340.30548095703125, 'logits/chosen': -0.6050703525543213, 'logits/rejected': -0.5892840623855591, 'epoch': 0.65} + 65%|██████▍ | 442/681 [18:44<09:52, 2.48s/it] 65%|██████▌ | 443/681 [18:46<09:57, 2.51s/it] {'loss': 1.2244, 'grad_norm': 49.77903366088867, 'learning_rate': 1.6570202148426815e-07, 'fcm_dpo/beta': 0.0016242916462942958, 'fcm_dpo/q_t': 0.4363713562488556, 'fcm_dpo/delta': -0.0004603892157319933, 'fcm_dpo/margin': 174.32577514648438, 'margin_dpo/margin_mean': 174.32577514648438, 'margin_dpo/margin_std': 398.736572265625, 'logps/chosen': -527.2371215820312, 'logps/rejected': -716.9697875976562, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'KL/chosen_KL_mean': -455.964111328125, 'KL/rejected_KL_mean': -630.2898559570312, 'KL/mean': -543.126953125, 'KL/std': 322.6165466308594, 'logits/chosen': -0.6197609901428223, 'logits/rejected': -0.5921432375907898, 'epoch': 0.65} + 65%|██████▌ | 443/681 [18:47<09:57, 2.51s/it] 65%|██████▌ | 444/681 [18:49<09:54, 2.51s/it] {'loss': 1.0468, 'grad_norm': 47.931034088134766, 'learning_rate': 1.6449496416858282e-07, 'fcm_dpo/beta': 0.0016027928795665503, 'fcm_dpo/q_t': 0.39284807443618774, 'fcm_dpo/delta': -0.0802171379327774, 'fcm_dpo/margin': 297.1391296386719, 'margin_dpo/margin_mean': 297.1391296386719, 'margin_dpo/margin_std': 377.43927001953125, 'logps/chosen': -488.59661865234375, 'logps/rejected': -825.7769165039062, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489807128906, 'KL/chosen_KL_mean': -431.3829040527344, 'KL/rejected_KL_mean': -728.5220336914062, 'KL/mean': -579.9524536132812, 'KL/std': 343.83526611328125, 'logits/chosen': -0.5499997138977051, 'logits/rejected': -0.5509436726570129, 'epoch': 0.65} + 65%|██████▌ | 444/681 [18:49<09:54, 2.51s/it] 65%|██████▌ | 445/681 [18:52<10:15, 2.61s/it] {'loss': 1.0847, 'grad_norm': 33.41164779663086, 'learning_rate': 1.6329015999011182e-07, 'fcm_dpo/beta': 0.0015939505537971854, 'fcm_dpo/q_t': 0.4040035307407379, 'fcm_dpo/delta': -0.02425987273454666, 'fcm_dpo/margin': 265.5284118652344, 'margin_dpo/margin_mean': 265.5284118652344, 'margin_dpo/margin_std': 356.4762878417969, 'logps/chosen': -454.1302490234375, 'logps/rejected': -745.0415649414062, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267059326172, 'KL/chosen_KL_mean': -386.8304443359375, 'KL/rejected_KL_mean': -652.35888671875, 'KL/mean': -519.5946655273438, 'KL/std': 278.63726806640625, 'logits/chosen': -0.6030969619750977, 'logits/rejected': -0.587662398815155, 'epoch': 0.65} + 65%|██████▌ | 445/681 [18:52<10:15, 2.61s/it] 65%|██████▌ | 446/681 [18:54<10:12, 2.61s/it] {'loss': 1.0313, 'grad_norm': 32.357421875, 'learning_rate': 1.6208764069656578e-07, 'fcm_dpo/beta': 0.001583605189807713, 'fcm_dpo/q_t': 0.3905741572380066, 'fcm_dpo/delta': -0.07294195890426636, 'fcm_dpo/margin': 296.2715759277344, 'margin_dpo/margin_mean': 296.2715759277344, 'margin_dpo/margin_std': 308.3010559082031, 'logps/chosen': -407.0983581542969, 'logps/rejected': -745.53564453125, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'KL/chosen_KL_mean': -347.9998779296875, 'KL/rejected_KL_mean': -644.271484375, 'KL/mean': -496.1356201171875, 'KL/std': 297.07550048828125, 'logits/chosen': -0.593482255935669, 'logits/rejected': -0.6047611236572266, 'epoch': 0.65} + 65%|██████▌ | 446/681 [18:54<10:12, 2.61s/it] 66%|██████▌ | 447/681 [18:57<09:53, 2.54s/it] {'loss': 1.0466, 'grad_norm': 34.99128341674805, 'learning_rate': 1.608874379754465e-07, 'fcm_dpo/beta': 0.001546173356473446, 'fcm_dpo/q_t': 0.39456889033317566, 'fcm_dpo/delta': -0.07345931977033615, 'fcm_dpo/margin': 303.7477722167969, 'margin_dpo/margin_mean': 303.7477722167969, 'margin_dpo/margin_std': 382.21380615234375, 'logps/chosen': -412.1768798828125, 'logps/rejected': -758.5440673828125, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'KL/chosen_KL_mean': -356.1015625, 'KL/rejected_KL_mean': -659.849365234375, 'KL/mean': -507.9754333496094, 'KL/std': 338.80889892578125, 'logits/chosen': -0.6641237735748291, 'logits/rejected': -0.6772187948226929, 'epoch': 0.66} + 66%|██████▌ | 447/681 [18:57<09:53, 2.54s/it] 66%|██████▌ | 448/681 [18:59<09:53, 2.55s/it] {'loss': 1.044, 'grad_norm': 37.12090301513672, 'learning_rate': 1.5968958345321177e-07, 'fcm_dpo/beta': 0.0015382280107587576, 'fcm_dpo/q_t': 0.3958974778652191, 'fcm_dpo/delta': -0.051600463688373566, 'fcm_dpo/margin': 292.04010009765625, 'margin_dpo/margin_mean': 292.0401306152344, 'margin_dpo/margin_std': 318.9796142578125, 'logps/chosen': -446.7455749511719, 'logps/rejected': -781.0465087890625, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'KL/chosen_KL_mean': -386.7417297363281, 'KL/rejected_KL_mean': -678.7818603515625, 'KL/mean': -532.7618408203125, 'KL/std': 287.2745056152344, 'logits/chosen': -0.5725095272064209, 'logits/rejected': -0.5768595933914185, 'epoch': 0.66} + 66%|██████▌ | 448/681 [18:59<09:53, 2.55s/it] 66%|██████▌ | 449/681 [19:02<09:48, 2.54s/it] {'loss': 1.1009, 'grad_norm': 35.58975601196289, 'learning_rate': 1.584941086944423e-07, 'fcm_dpo/beta': 0.001521222060546279, 'fcm_dpo/q_t': 0.4064374566078186, 'fcm_dpo/delta': -0.02261107787489891, 'fcm_dpo/margin': 277.12255859375, 'margin_dpo/margin_mean': 277.12255859375, 'margin_dpo/margin_std': 424.0272216796875, 'logps/chosen': -459.05859375, 'logps/rejected': -757.25146484375, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'KL/chosen_KL_mean': -391.531982421875, 'KL/rejected_KL_mean': -668.654541015625, 'KL/mean': -530.09326171875, 'KL/std': 336.97320556640625, 'logits/chosen': -0.6065933704376221, 'logits/rejected': -0.5960414409637451, 'epoch': 0.66} + 66%|██████▌ | 449/681 [19:02<09:48, 2.54s/it] 66%|██████▌ | 450/681 [19:04<09:46, 2.54s/it] {'loss': 1.0075, 'grad_norm': 38.07679748535156, 'learning_rate': 1.573010452010098e-07, 'fcm_dpo/beta': 0.0015039572026580572, 'fcm_dpo/q_t': 0.3854549527168274, 'fcm_dpo/delta': -0.09042147547006607, 'fcm_dpo/margin': 323.22100830078125, 'margin_dpo/margin_mean': 323.2209777832031, 'margin_dpo/margin_std': 305.29486083984375, 'logps/chosen': -382.2502136230469, 'logps/rejected': -751.1180419921875, 'logps/ref_chosen': -57.10811996459961, 'logps/ref_rejected': -102.75494384765625, 'KL/chosen_KL_mean': -325.14208984375, 'KL/rejected_KL_mean': -648.3630981445312, 'KL/mean': -486.7525634765625, 'KL/std': 311.44439697265625, 'logits/chosen': -0.6078156232833862, 'logits/rejected': -0.6151422262191772, 'epoch': 0.66} + 66%|██████▌ | 450/681 [19:04<09:46, 2.54s/it] 66%|██████▌ | 451/681 [19:07<09:22, 2.44s/it] {'loss': 1.1516, 'grad_norm': 28.49110221862793, 'learning_rate': 1.5611042441124687e-07, 'fcm_dpo/beta': 0.001510746544227004, 'fcm_dpo/q_t': 0.41878455877304077, 'fcm_dpo/delta': 0.057636506855487823, 'fcm_dpo/margin': 227.80252075195312, 'margin_dpo/margin_mean': 227.80252075195312, 'margin_dpo/margin_std': 386.4510498046875, 'logps/chosen': -481.79425048828125, 'logps/rejected': -724.057373046875, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'KL/chosen_KL_mean': -423.325439453125, 'KL/rejected_KL_mean': -651.1279907226562, 'KL/mean': -537.2266845703125, 'KL/std': 339.7525634765625, 'logits/chosen': -0.650975227355957, 'logits/rejected': -0.6234632134437561, 'epoch': 0.66} + 66%|██████▌ | 451/681 [19:07<09:22, 2.44s/it] 66%|██████▋ | 452/681 [19:09<09:19, 2.44s/it] {'loss': 1.0375, 'grad_norm': 21.779226303100586, 'learning_rate': 1.549222776991186e-07, 'fcm_dpo/beta': 0.0014999432023614645, 'fcm_dpo/q_t': 0.3958936929702759, 'fcm_dpo/delta': -0.04238360375165939, 'fcm_dpo/margin': 293.60125732421875, 'margin_dpo/margin_mean': 293.60125732421875, 'margin_dpo/margin_std': 286.99456787109375, 'logps/chosen': -346.420654296875, 'logps/rejected': -687.40283203125, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77142333984375, 'KL/chosen_KL_mean': -296.03009033203125, 'KL/rejected_KL_mean': -589.63134765625, 'KL/mean': -442.83074951171875, 'KL/std': 274.8699951171875, 'logits/chosen': -0.5605393648147583, 'logits/rejected': -0.5794718265533447, 'epoch': 0.66} + 66%|██████▋ | 452/681 [19:09<09:19, 2.44s/it] 67%|██████▋ | 453/681 [19:11<09:06, 2.40s/it] {'loss': 1.1003, 'grad_norm': 23.549057006835938, 'learning_rate': 1.5373663637339584e-07, 'fcm_dpo/beta': 0.0014997010584920645, 'fcm_dpo/q_t': 0.4132787585258484, 'fcm_dpo/delta': 0.022862950339913368, 'fcm_dpo/margin': 251.9658203125, 'margin_dpo/margin_mean': 251.9658203125, 'margin_dpo/margin_std': 328.4573974609375, 'logps/chosen': -418.96673583984375, 'logps/rejected': -695.4251098632812, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'KL/chosen_KL_mean': -361.25189208984375, 'KL/rejected_KL_mean': -613.2176513671875, 'KL/mean': -487.23480224609375, 'KL/std': 274.94085693359375, 'logits/chosen': -0.6005634069442749, 'logits/rejected': -0.576606273651123, 'epoch': 0.67} + 67%|██████▋ | 453/681 [19:11<09:06, 2.40s/it] 67%|██████▋ | 454/681 [19:14<09:11, 2.43s/it] {'loss': 1.0595, 'grad_norm': 23.49666404724121, 'learning_rate': 1.5255353167683017e-07, 'fcm_dpo/beta': 0.0014922961127012968, 'fcm_dpo/q_t': 0.3982745409011841, 'fcm_dpo/delta': -0.04614517092704773, 'fcm_dpo/margin': 297.4459228515625, 'margin_dpo/margin_mean': 297.4459228515625, 'margin_dpo/margin_std': 368.7091369628906, 'logps/chosen': -490.2272644042969, 'logps/rejected': -811.6783447265625, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.95079040527344, 'KL/chosen_KL_mean': -429.2816162109375, 'KL/rejected_KL_mean': -726.7275390625, 'KL/mean': -578.0045166015625, 'KL/std': 324.15545654296875, 'logits/chosen': -0.6576023101806641, 'logits/rejected': -0.6443264484405518, 'epoch': 0.67} + 67%|██████▋ | 454/681 [19:14<09:11, 2.43s/it] 67%|██████▋ | 455/681 [19:16<09:06, 2.42s/it] {'loss': 1.0019, 'grad_norm': 33.2188606262207, 'learning_rate': 1.5137299478533064e-07, 'fcm_dpo/beta': 0.0014687062939628959, 'fcm_dpo/q_t': 0.37912923097610474, 'fcm_dpo/delta': -0.13889265060424805, 'fcm_dpo/margin': 362.0201416015625, 'margin_dpo/margin_mean': 362.0201416015625, 'margin_dpo/margin_std': 392.91925048828125, 'logps/chosen': -425.417236328125, 'logps/rejected': -857.8521728515625, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'KL/chosen_KL_mean': -380.530517578125, 'KL/rejected_KL_mean': -742.5506591796875, 'KL/mean': -561.5405883789062, 'KL/std': 345.6868896484375, 'logits/chosen': -0.6506750583648682, 'logits/rejected': -0.6731724739074707, 'epoch': 0.67} + 67%|██████▋ | 455/681 [19:16<09:06, 2.42s/it] 67%|██████▋ | 456/681 [19:19<09:16, 2.47s/it] {'loss': 1.0025, 'grad_norm': 32.33451843261719, 'learning_rate': 1.5019505680714232e-07, 'fcm_dpo/beta': 0.0014300058828666806, 'fcm_dpo/q_t': 0.38482022285461426, 'fcm_dpo/delta': -0.10262109339237213, 'fcm_dpo/margin': 347.93292236328125, 'margin_dpo/margin_mean': 347.9329528808594, 'margin_dpo/margin_std': 340.260986328125, 'logps/chosen': -458.8480224609375, 'logps/rejected': -854.9620361328125, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21784210205078, 'KL/chosen_KL_mean': -401.8112487792969, 'KL/rejected_KL_mean': -749.744140625, 'KL/mean': -575.7777099609375, 'KL/std': 341.2975158691406, 'logits/chosen': -0.612378716468811, 'logits/rejected': -0.632037878036499, 'epoch': 0.67} + 67%|██████▋ | 456/681 [19:19<09:16, 2.47s/it] 67%|██████▋ | 457/681 [19:22<09:33, 2.56s/it] {'loss': 1.0157, 'grad_norm': 27.79351043701172, 'learning_rate': 1.4901974878202627e-07, 'fcm_dpo/beta': 0.0014014223124831915, 'fcm_dpo/q_t': 0.3878782391548157, 'fcm_dpo/delta': -0.08146154880523682, 'fcm_dpo/margin': 340.5579833984375, 'margin_dpo/margin_mean': 340.5579833984375, 'margin_dpo/margin_std': 328.9454345703125, 'logps/chosen': -450.0408020019531, 'logps/rejected': -821.4658203125, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'KL/chosen_KL_mean': -395.79827880859375, 'KL/rejected_KL_mean': -736.3562622070312, 'KL/mean': -566.0772705078125, 'KL/std': 335.4608459472656, 'logits/chosen': -0.6594676971435547, 'logits/rejected': -0.6568940877914429, 'epoch': 0.67} + 67%|██████▋ | 457/681 [19:22<09:33, 2.56s/it] 67%|██████▋ | 458/681 [19:24<09:27, 2.54s/it] {'loss': 1.0668, 'grad_norm': 24.5087890625, 'learning_rate': 1.4784710168044212e-07, 'fcm_dpo/beta': 0.001388939330354333, 'fcm_dpo/q_t': 0.4020830988883972, 'fcm_dpo/delta': -0.0265361275523901, 'fcm_dpo/margin': 306.07122802734375, 'margin_dpo/margin_mean': 306.07122802734375, 'margin_dpo/margin_std': 367.548095703125, 'logps/chosen': -480.5562438964844, 'logps/rejected': -828.90185546875, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'KL/chosen_KL_mean': -425.1473693847656, 'KL/rejected_KL_mean': -731.2185668945312, 'KL/mean': -578.1829833984375, 'KL/std': 312.2036437988281, 'logits/chosen': -0.6767433881759644, 'logits/rejected': -0.6698124408721924, 'epoch': 0.67} + 67%|██████▋ | 458/681 [19:24<09:27, 2.54s/it] 67%|██████▋ | 459/681 [19:27<09:26, 2.55s/it] {'loss': 1.0698, 'grad_norm': 37.77009201049805, 'learning_rate': 1.466771464027316e-07, 'fcm_dpo/beta': 0.0013780685840174556, 'fcm_dpo/q_t': 0.3988497853279114, 'fcm_dpo/delta': -0.04427627474069595, 'fcm_dpo/margin': 320.7283935546875, 'margin_dpo/margin_mean': 320.7284240722656, 'margin_dpo/margin_std': 416.4349365234375, 'logps/chosen': -523.730224609375, 'logps/rejected': -884.0697021484375, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'KL/chosen_KL_mean': -477.1727294921875, 'KL/rejected_KL_mean': -797.9011840820312, 'KL/mean': -637.5369873046875, 'KL/std': 362.09600830078125, 'logits/chosen': -0.6691190004348755, 'logits/rejected': -0.6827735304832458, 'epoch': 0.67} + 67%|██████▋ | 459/681 [19:27<09:26, 2.55s/it] 68%|██████▊ | 460/681 [19:29<09:31, 2.59s/it] {'loss': 1.0251, 'grad_norm': 38.9324836730957, 'learning_rate': 1.4550991377830423e-07, 'fcm_dpo/beta': 0.0013614799827337265, 'fcm_dpo/q_t': 0.38881251215934753, 'fcm_dpo/delta': -0.08867627382278442, 'fcm_dpo/margin': 355.7840576171875, 'margin_dpo/margin_mean': 355.7840576171875, 'margin_dpo/margin_std': 394.8114013671875, 'logps/chosen': -576.19775390625, 'logps/rejected': -984.4661865234375, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'KL/chosen_KL_mean': -524.5628051757812, 'KL/rejected_KL_mean': -880.3468627929688, 'KL/mean': -702.454833984375, 'KL/std': 362.19757080078125, 'logits/chosen': -0.7337905168533325, 'logits/rejected': -0.7658564448356628, 'epoch': 0.68} + 68%|██████▊ | 460/681 [19:29<09:31, 2.59s/it] 68%|██████▊ | 461/681 [19:32<09:29, 2.59s/it] {'loss': 1.114, 'grad_norm': 25.64112663269043, 'learning_rate': 1.4434543456482518e-07, 'fcm_dpo/beta': 0.001360948197543621, 'fcm_dpo/q_t': 0.41348153352737427, 'fcm_dpo/delta': 0.018288645893335342, 'fcm_dpo/margin': 280.931640625, 'margin_dpo/margin_mean': 280.931640625, 'margin_dpo/margin_std': 416.9804992675781, 'logps/chosen': -606.66259765625, 'logps/rejected': -918.88916015625, 'logps/ref_chosen': -55.18195724487305, 'logps/ref_rejected': -86.47689819335938, 'KL/chosen_KL_mean': -551.4806518554688, 'KL/rejected_KL_mean': -832.4122314453125, 'KL/mean': -691.9464111328125, 'KL/std': 369.3660888671875, 'logits/chosen': -0.7380908727645874, 'logits/rejected': -0.750026524066925, 'epoch': 0.68} + 68%|██████▊ | 461/681 [19:32<09:29, 2.59s/it] 68%|██████▊ | 462/681 [19:34<09:19, 2.55s/it] {'loss': 1.1826, 'grad_norm': 43.11104965209961, 'learning_rate': 1.4318373944740484e-07, 'fcm_dpo/beta': 0.0013791057281196117, 'fcm_dpo/q_t': 0.4319148361682892, 'fcm_dpo/delta': 0.09573453664779663, 'fcm_dpo/margin': 222.78219604492188, 'margin_dpo/margin_mean': 222.78216552734375, 'margin_dpo/margin_std': 426.85430908203125, 'logps/chosen': -660.9522705078125, 'logps/rejected': -892.6475830078125, 'logps/ref_chosen': -69.92803192138672, 'logps/ref_rejected': -78.84111022949219, 'KL/chosen_KL_mean': -591.0242919921875, 'KL/rejected_KL_mean': -813.806396484375, 'KL/mean': -702.4153442382812, 'KL/std': 374.5951232910156, 'logits/chosen': -0.8458345532417297, 'logits/rejected': -0.829505205154419, 'epoch': 0.68} + 68%|██████▊ | 462/681 [19:35<09:19, 2.55s/it] 68%|██████▊ | 463/681 [19:37<09:09, 2.52s/it] {'loss': 1.1057, 'grad_norm': 39.33317565917969, 'learning_rate': 1.4202485903778976e-07, 'fcm_dpo/beta': 0.0013890512054786086, 'fcm_dpo/q_t': 0.40834498405456543, 'fcm_dpo/delta': -0.007682671770453453, 'fcm_dpo/margin': 293.15423583984375, 'margin_dpo/margin_mean': 293.15423583984375, 'margin_dpo/margin_std': 435.8871765136719, 'logps/chosen': -629.8367919921875, 'logps/rejected': -956.7415771484375, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'KL/chosen_KL_mean': -574.5623779296875, 'KL/rejected_KL_mean': -867.7166748046875, 'KL/mean': -721.1395263671875, 'KL/std': 383.65875244140625, 'logits/chosen': -0.8075680732727051, 'logits/rejected': -0.8106831312179565, 'epoch': 0.68} + 68%|██████▊ | 463/681 [19:37<09:09, 2.52s/it] 68%|██████▊ | 464/681 [19:39<08:50, 2.45s/it] {'loss': 0.937, 'grad_norm': 36.297725677490234, 'learning_rate': 1.4086882387355658e-07, 'fcm_dpo/beta': 0.0013278971891850233, 'fcm_dpo/q_t': 0.35767611861228943, 'fcm_dpo/delta': -0.24185608327388763, 'fcm_dpo/margin': 470.54376220703125, 'margin_dpo/margin_mean': 470.5437316894531, 'margin_dpo/margin_std': 456.50927734375, 'logps/chosen': -626.0076904296875, 'logps/rejected': -1148.1285400390625, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'KL/chosen_KL_mean': -575.095458984375, 'KL/rejected_KL_mean': -1045.63916015625, 'KL/mean': -810.3673095703125, 'KL/std': 450.2059631347656, 'logits/chosen': -0.7917243242263794, 'logits/rejected': -0.8560171127319336, 'epoch': 0.68} + 68%|██████▊ | 464/681 [19:39<08:50, 2.45s/it] 68%|██████▊ | 465/681 [19:42<08:55, 2.48s/it] {'loss': 1.0321, 'grad_norm': 36.88157653808594, 'learning_rate': 1.3971566441730714e-07, 'fcm_dpo/beta': 0.0012997114099562168, 'fcm_dpo/q_t': 0.38136354088783264, 'fcm_dpo/delta': -0.12087617814540863, 'fcm_dpo/margin': 396.10296630859375, 'margin_dpo/margin_mean': 396.10296630859375, 'margin_dpo/margin_std': 493.5492858886719, 'logps/chosen': -632.348876953125, 'logps/rejected': -1082.281005859375, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'KL/chosen_KL_mean': -572.2319946289062, 'KL/rejected_KL_mean': -968.3349609375, 'KL/mean': -770.283447265625, 'KL/std': 461.42987060546875, 'logits/chosen': -0.7635716199874878, 'logits/rejected': -0.7767517566680908, 'epoch': 0.68} + 68%|██████▊ | 465/681 [19:42<08:55, 2.48s/it] 68%|██████▊ | 466/681 [19:44<09:02, 2.52s/it] {'loss': 1.0895, 'grad_norm': 33.14835739135742, 'learning_rate': 1.3856541105586545e-07, 'fcm_dpo/beta': 0.001274168025702238, 'fcm_dpo/q_t': 0.4008180499076843, 'fcm_dpo/delta': -0.03375307843089104, 'fcm_dpo/margin': 338.7154541015625, 'margin_dpo/margin_mean': 338.7154541015625, 'margin_dpo/margin_std': 476.6962890625, 'logps/chosen': -686.234375, 'logps/rejected': -1062.3443603515625, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'KL/chosen_KL_mean': -633.3134765625, 'KL/rejected_KL_mean': -972.0289306640625, 'KL/mean': -802.6712646484375, 'KL/std': 447.70947265625, 'logits/chosen': -0.8126999139785767, 'logits/rejected': -0.8147940635681152, 'epoch': 0.68} + 68%|██████▊ | 466/681 [19:44<09:02, 2.52s/it] 69%|██████▊ | 467/681 [19:47<08:57, 2.51s/it] {'loss': 1.1674, 'grad_norm': 69.0197525024414, 'learning_rate': 1.3741809409947729e-07, 'fcm_dpo/beta': 0.001259978162124753, 'fcm_dpo/q_t': 0.40663182735443115, 'fcm_dpo/delta': -0.03823067247867584, 'fcm_dpo/margin': 344.94549560546875, 'margin_dpo/margin_mean': 344.94549560546875, 'margin_dpo/margin_std': 677.0098876953125, 'logps/chosen': -885.61474609375, 'logps/rejected': -1254.70458984375, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'KL/chosen_KL_mean': -806.89892578125, 'KL/rejected_KL_mean': -1151.8443603515625, 'KL/mean': -979.3716430664062, 'KL/std': 577.361572265625, 'logits/chosen': -0.920897901058197, 'logits/rejected': -0.8992031812667847, 'epoch': 0.69} + 69%|██████▊ | 467/681 [19:47<08:57, 2.51s/it] 69%|██████▊ | 468/681 [19:49<09:02, 2.55s/it] {'loss': 1.028, 'grad_norm': 38.766334533691406, 'learning_rate': 1.362737437810114e-07, 'fcm_dpo/beta': 0.0012417640537023544, 'fcm_dpo/q_t': 0.3849431276321411, 'fcm_dpo/delta': -0.13427412509918213, 'fcm_dpo/margin': 424.33441162109375, 'margin_dpo/margin_mean': 424.33441162109375, 'margin_dpo/margin_std': 557.9710693359375, 'logps/chosen': -695.2952270507812, 'logps/rejected': -1150.7230224609375, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02880859375, 'KL/chosen_KL_mean': -625.35986328125, 'KL/rejected_KL_mean': -1049.6942138671875, 'KL/mean': -837.527099609375, 'KL/std': 500.7874755859375, 'logits/chosen': -0.869031548500061, 'logits/rejected': -0.8773350715637207, 'epoch': 0.69} + 69%|██████▊ | 468/681 [19:50<09:02, 2.55s/it] 69%|██████▉ | 469/681 [19:52<09:17, 2.63s/it] {'loss': 1.0321, 'grad_norm': 37.197357177734375, 'learning_rate': 1.351323902551631e-07, 'fcm_dpo/beta': 0.0012096271384507418, 'fcm_dpo/q_t': 0.38767051696777344, 'fcm_dpo/delta': -0.09502536803483963, 'fcm_dpo/margin': 404.11572265625, 'margin_dpo/margin_mean': 404.11572265625, 'margin_dpo/margin_std': 464.22857666015625, 'logps/chosen': -716.05419921875, 'logps/rejected': -1156.8316650390625, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'KL/chosen_KL_mean': -647.9295654296875, 'KL/rejected_KL_mean': -1052.0452880859375, 'KL/mean': -849.9874267578125, 'KL/std': 426.935791015625, 'logits/chosen': -0.8826281428337097, 'logits/rejected': -0.8875974416732788, 'epoch': 0.69} + 69%|██████▉ | 469/681 [19:52<09:17, 2.63s/it] 69%|██████▉ | 470/681 [19:55<09:09, 2.60s/it] {'loss': 1.0611, 'grad_norm': 26.235424041748047, 'learning_rate': 1.339940635976592e-07, 'fcm_dpo/beta': 0.001205753069370985, 'fcm_dpo/q_t': 0.3957051932811737, 'fcm_dpo/delta': -0.052081190049648285, 'fcm_dpo/margin': 372.99566650390625, 'margin_dpo/margin_mean': 372.99566650390625, 'margin_dpo/margin_std': 466.9468078613281, 'logps/chosen': -566.755126953125, 'logps/rejected': -978.6617431640625, 'logps/ref_chosen': -43.791927337646484, 'logps/ref_rejected': -82.70285034179688, 'KL/chosen_KL_mean': -522.9632568359375, 'KL/rejected_KL_mean': -895.9589233398438, 'KL/mean': -709.4610595703125, 'KL/std': 429.58807373046875, 'logits/chosen': -0.8188216686248779, 'logits/rejected': -0.8245443105697632, 'epoch': 0.69} + 69%|██████▉ | 470/681 [19:55<09:09, 2.60s/it] 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] {'loss': 1.1159, 'grad_norm': 32.16667938232422, 'learning_rate': 1.3285879380446563e-07, 'fcm_dpo/beta': 0.0011935688089579344, 'fcm_dpo/q_t': 0.41308581829071045, 'fcm_dpo/delta': 0.009803693741559982, 'fcm_dpo/margin': 326.6017761230469, 'margin_dpo/margin_mean': 326.601806640625, 'margin_dpo/margin_std': 494.537353515625, 'logps/chosen': -722.0469970703125, 'logps/rejected': -1068.9197998046875, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'KL/chosen_KL_mean': -658.70751953125, 'KL/rejected_KL_mean': -985.309326171875, 'KL/mean': -822.0084228515625, 'KL/std': 467.14373779296875, 'logits/chosen': -0.90375816822052, 'logits/rejected': -0.9042317271232605, 'epoch': 0.69} + 69%|██████▉ | 471/681 [19:57<08:48, 2.52s/it] 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] {'loss': 1.08, 'grad_norm': 28.73564338684082, 'learning_rate': 1.317266107909975e-07, 'fcm_dpo/beta': 0.0011852658353745937, 'fcm_dpo/q_t': 0.3995856046676636, 'fcm_dpo/delta': -0.07024183124303818, 'fcm_dpo/margin': 393.58203125, 'margin_dpo/margin_mean': 393.58203125, 'margin_dpo/margin_std': 579.6232299804688, 'logps/chosen': -724.6898193359375, 'logps/rejected': -1151.81494140625, 'logps/ref_chosen': -83.66610717773438, 'logps/ref_rejected': -117.20919799804688, 'KL/chosen_KL_mean': -641.023681640625, 'KL/rejected_KL_mean': -1034.605712890625, 'KL/mean': -837.814697265625, 'KL/std': 537.2503662109375, 'logits/chosen': -0.8875927925109863, 'logits/rejected': -0.8611509203910828, 'epoch': 0.69} + 69%|██████▉ | 472/681 [20:00<09:01, 2.59s/it] 69%|██████▉ | 473/681 [20:03<09:05, 2.62s/it] {'loss': 1.3228, 'grad_norm': 99.1730728149414, 'learning_rate': 1.3059754439133002e-07, 'fcm_dpo/beta': 0.0012000746792182326, 'fcm_dpo/q_t': 0.4505438506603241, 'fcm_dpo/delta': 0.06275806576013565, 'fcm_dpo/margin': 189.72039794921875, 'margin_dpo/margin_mean': 189.72039794921875, 'margin_dpo/margin_std': 697.6124267578125, 'logps/chosen': -821.022705078125, 'logps/rejected': -1028.3927001953125, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'KL/chosen_KL_mean': -757.5256958007812, 'KL/rejected_KL_mean': -947.24609375, 'KL/mean': -852.3858642578125, 'KL/std': 554.10888671875, 'logits/chosen': -0.8745533227920532, 'logits/rejected': -0.8379828929901123, 'epoch': 0.69} + 69%|██████▉ | 473/681 [20:03<09:05, 2.62s/it] 70%|██████▉ | 474/681 [20:05<09:07, 2.65s/it] {'loss': 1.1519, 'grad_norm': 34.411014556884766, 'learning_rate': 1.2947162435741277e-07, 'fcm_dpo/beta': 0.0011911317706108093, 'fcm_dpo/q_t': 0.41488319635391235, 'fcm_dpo/delta': -0.0750809758901596, 'fcm_dpo/margin': 314.18511962890625, 'margin_dpo/margin_mean': 314.1850891113281, 'margin_dpo/margin_std': 546.5555419921875, 'logps/chosen': -680.64794921875, 'logps/rejected': -1032.301513671875, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'KL/chosen_KL_mean': -628.0360107421875, 'KL/rejected_KL_mean': -942.2210693359375, 'KL/mean': -785.1285400390625, 'KL/std': 488.5404052734375, 'logits/chosen': -0.820152223110199, 'logits/rejected': -0.8232827186584473, 'epoch': 0.7} + 70%|██████▉ | 474/681 [20:05<09:07, 2.65s/it] 70%|██████▉ | 475/681 [20:08<08:54, 2.59s/it] {'loss': 1.0303, 'grad_norm': 29.762432098388672, 'learning_rate': 1.2834888035828596e-07, 'fcm_dpo/beta': 0.0011684303171932697, 'fcm_dpo/q_t': 0.39084818959236145, 'fcm_dpo/delta': -0.07601547241210938, 'fcm_dpo/margin': 404.2965087890625, 'margin_dpo/margin_mean': 404.2965087890625, 'margin_dpo/margin_std': 443.20880126953125, 'logps/chosen': -511.6790466308594, 'logps/rejected': -963.5432739257812, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06294250488281, 'KL/chosen_KL_mean': -469.183837890625, 'KL/rejected_KL_mean': -873.4803466796875, 'KL/mean': -671.3321533203125, 'KL/std': 405.08099365234375, 'logits/chosen': -0.8730387687683105, 'logits/rejected': -0.896369993686676, 'epoch': 0.7} + 70%|██████▉ | 475/681 [20:08<08:54, 2.59s/it] 70%|██████▉ | 476/681 [20:10<08:49, 2.58s/it] {'loss': 1.1123, 'grad_norm': 34.20625686645508, 'learning_rate': 1.2722934197929802e-07, 'fcm_dpo/beta': 0.0011697396403178573, 'fcm_dpo/q_t': 0.41562318801879883, 'fcm_dpo/delta': 0.03224332630634308, 'fcm_dpo/margin': 315.4133605957031, 'margin_dpo/margin_mean': 315.4133605957031, 'margin_dpo/margin_std': 440.5125732421875, 'logps/chosen': -619.4176025390625, 'logps/rejected': -965.591796875, 'logps/ref_chosen': -42.94938278198242, 'logps/ref_rejected': -73.71023559570312, 'KL/chosen_KL_mean': -576.46826171875, 'KL/rejected_KL_mean': -891.881591796875, 'KL/mean': -734.1749267578125, 'KL/std': 429.33795166015625, 'logits/chosen': -0.8082433342933655, 'logits/rejected': -0.8174213171005249, 'epoch': 0.7} + 70%|██████▉ | 476/681 [20:10<08:49, 2.58s/it] 70%|███████ | 477/681 [20:13<08:46, 2.58s/it] {'loss': 1.1364, 'grad_norm': 26.809673309326172, 'learning_rate': 1.2611303872132631e-07, 'fcm_dpo/beta': 0.0011787796393036842, 'fcm_dpo/q_t': 0.4136529564857483, 'fcm_dpo/delta': 0.02154078520834446, 'fcm_dpo/margin': 321.58892822265625, 'margin_dpo/margin_mean': 321.5888977050781, 'margin_dpo/margin_std': 541.0950927734375, 'logps/chosen': -669.265380859375, 'logps/rejected': -996.2190551757812, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'KL/chosen_KL_mean': -598.4927978515625, 'KL/rejected_KL_mean': -920.0816650390625, 'KL/mean': -759.2872314453125, 'KL/std': 456.7637939453125, 'logits/chosen': -0.860885739326477, 'logits/rejected': -0.8121699094772339, 'epoch': 0.7} + 70%|███████ | 477/681 [20:13<08:46, 2.58s/it] 70%|███████ | 478/681 [20:16<08:48, 2.60s/it] {'loss': 1.0715, 'grad_norm': 28.476211547851562, 'learning_rate': 1.2500000000000005e-07, 'fcm_dpo/beta': 0.0011755165178328753, 'fcm_dpo/q_t': 0.4015880227088928, 'fcm_dpo/delta': -0.02756289392709732, 'fcm_dpo/margin': 362.6502685546875, 'margin_dpo/margin_mean': 362.6502685546875, 'margin_dpo/margin_std': 451.751708984375, 'logps/chosen': -532.5169067382812, 'logps/rejected': -939.088623046875, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'KL/chosen_KL_mean': -491.0763854980469, 'KL/rejected_KL_mean': -853.7266845703125, 'KL/mean': -672.4014892578125, 'KL/std': 389.57879638671875, 'logits/chosen': -0.7467737197875977, 'logits/rejected': -0.7583505511283875, 'epoch': 0.7} + 70%|███████ | 478/681 [20:16<08:48, 2.60s/it] 70%|███████ | 479/681 [20:18<08:35, 2.55s/it] {'loss': 1.1094, 'grad_norm': 28.581802368164062, 'learning_rate': 1.2389025514492456e-07, 'fcm_dpo/beta': 0.001176186604425311, 'fcm_dpo/q_t': 0.40878403186798096, 'fcm_dpo/delta': -0.0067070163786411285, 'fcm_dpo/margin': 345.01519775390625, 'margin_dpo/margin_mean': 345.01519775390625, 'margin_dpo/margin_std': 522.553955078125, 'logps/chosen': -669.9825439453125, 'logps/rejected': -1056.2061767578125, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'KL/chosen_KL_mean': -616.0745849609375, 'KL/rejected_KL_mean': -961.08984375, 'KL/mean': -788.5822143554688, 'KL/std': 471.95684814453125, 'logits/chosen': -0.7714790105819702, 'logits/rejected': -0.794031023979187, 'epoch': 0.7} + 70%|███████ | 479/681 [20:18<08:35, 2.55s/it] 70%|███████ | 480/681 [20:20<08:23, 2.51s/it] {'loss': 1.1653, 'grad_norm': 48.71305847167969, 'learning_rate': 1.227838333989088e-07, 'fcm_dpo/beta': 0.00116480584256351, 'fcm_dpo/q_t': 0.42370662093162537, 'fcm_dpo/delta': -0.03519085794687271, 'fcm_dpo/margin': 283.5472106933594, 'margin_dpo/margin_mean': 283.5472412109375, 'margin_dpo/margin_std': 481.92913818359375, 'logps/chosen': -755.9378051757812, 'logps/rejected': -1063.73486328125, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'KL/chosen_KL_mean': -697.255126953125, 'KL/rejected_KL_mean': -980.8023071289062, 'KL/mean': -839.0286865234375, 'KL/std': 442.7588806152344, 'logits/chosen': -0.7846644520759583, 'logits/rejected': -0.7700395584106445, 'epoch': 0.7} + 70%|███████ | 480/681 [20:20<08:23, 2.51s/it] 71%|███████ | 481/681 [20:23<08:20, 2.50s/it] {'loss': 1.0382, 'grad_norm': 37.28335189819336, 'learning_rate': 1.2168076391719489e-07, 'fcm_dpo/beta': 0.0011470152530819178, 'fcm_dpo/q_t': 0.39048588275909424, 'fcm_dpo/delta': -0.08663056790828705, 'fcm_dpo/margin': 420.6690368652344, 'margin_dpo/margin_mean': 420.6690368652344, 'margin_dpo/margin_std': 509.59088134765625, 'logps/chosen': -661.0614013671875, 'logps/rejected': -1119.1866455078125, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'KL/chosen_KL_mean': -606.09716796875, 'KL/rejected_KL_mean': -1026.76611328125, 'KL/mean': -816.431640625, 'KL/std': 473.70269775390625, 'logits/chosen': -0.8456419110298157, 'logits/rejected': -0.8666043281555176, 'epoch': 0.71} + 71%|███████ | 481/681 [20:23<08:20, 2.50s/it] 71%|███████ | 482/681 [20:26<08:38, 2.60s/it] {'loss': 1.2584, 'grad_norm': 46.77339172363281, 'learning_rate': 1.2058107576668938e-07, 'fcm_dpo/beta': 0.0011683362536132336, 'fcm_dpo/q_t': 0.4412637948989868, 'fcm_dpo/delta': 0.15478384494781494, 'fcm_dpo/margin': 213.07933044433594, 'margin_dpo/margin_mean': 213.07931518554688, 'margin_dpo/margin_std': 576.9671630859375, 'logps/chosen': -737.368896484375, 'logps/rejected': -970.4843139648438, 'logps/ref_chosen': -67.553466796875, 'logps/ref_rejected': -87.58953857421875, 'KL/chosen_KL_mean': -669.8154907226562, 'KL/rejected_KL_mean': -882.894775390625, 'KL/mean': -776.3551025390625, 'KL/std': 481.4432373046875, 'logits/chosen': -0.7833594083786011, 'logits/rejected': -0.7691007852554321, 'epoch': 0.71} + 71%|███████ | 482/681 [20:26<08:38, 2.60s/it] 71%|███████ | 483/681 [20:28<08:36, 2.61s/it] {'loss': 1.0182, 'grad_norm': 31.144506454467773, 'learning_rate': 1.194847979251979e-07, 'fcm_dpo/beta': 0.0011535545345395803, 'fcm_dpo/q_t': 0.3826148509979248, 'fcm_dpo/delta': -0.13136449456214905, 'fcm_dpo/margin': 454.625732421875, 'margin_dpo/margin_mean': 454.6257629394531, 'margin_dpo/margin_std': 549.6389770507812, 'logps/chosen': -657.1290893554688, 'logps/rejected': -1144.2119140625, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'KL/chosen_KL_mean': -593.7992553710938, 'KL/rejected_KL_mean': -1048.425048828125, 'KL/mean': -821.1121826171875, 'KL/std': 477.17486572265625, 'logits/chosen': -0.871976375579834, 'logits/rejected': -0.878372311592102, 'epoch': 0.71} + 71%|███████ | 483/681 [20:28<08:36, 2.61s/it] 71%|███████ | 484/681 [20:31<08:17, 2.52s/it] {'loss': 1.0499, 'grad_norm': 40.899009704589844, 'learning_rate': 1.1839195928066101e-07, 'fcm_dpo/beta': 0.0011427226709201932, 'fcm_dpo/q_t': 0.39542460441589355, 'fcm_dpo/delta': -0.06293704360723495, 'fcm_dpo/margin': 402.230712890625, 'margin_dpo/margin_mean': 402.230712890625, 'margin_dpo/margin_std': 477.49810791015625, 'logps/chosen': -578.3538818359375, 'logps/rejected': -1005.8179321289062, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'KL/chosen_KL_mean': -519.2157592773438, 'KL/rejected_KL_mean': -921.4464721679688, 'KL/mean': -720.3311767578125, 'KL/std': 480.59307861328125, 'logits/chosen': -0.8334769010543823, 'logits/rejected': -0.8463296890258789, 'epoch': 0.71} + 71%|███████ | 484/681 [20:31<08:17, 2.52s/it] 71%|███████ | 485/681 [20:33<08:12, 2.51s/it] {'loss': 1.0859, 'grad_norm': 32.848182678222656, 'learning_rate': 1.1730258863039347e-07, 'fcm_dpo/beta': 0.0011293399147689342, 'fcm_dpo/q_t': 0.40266501903533936, 'fcm_dpo/delta': -0.03417329490184784, 'fcm_dpo/margin': 383.07708740234375, 'margin_dpo/margin_mean': 383.07708740234375, 'margin_dpo/margin_std': 541.0684814453125, 'logps/chosen': -609.33154296875, 'logps/rejected': -1036.923095703125, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408233642578, 'KL/chosen_KL_mean': -550.48193359375, 'KL/rejected_KL_mean': -933.5590209960938, 'KL/mean': -742.0205078125, 'KL/std': 464.6655578613281, 'logits/chosen': -0.8008699417114258, 'logits/rejected': -0.8162240386009216, 'epoch': 0.71} + 71%|███████ | 485/681 [20:33<08:12, 2.51s/it] 71%|███████▏ | 486/681 [20:35<07:43, 2.38s/it] {'loss': 1.072, 'grad_norm': 31.21572494506836, 'learning_rate': 1.1621671468032493e-07, 'fcm_dpo/beta': 0.0011101996060460806, 'fcm_dpo/q_t': 0.39419782161712646, 'fcm_dpo/delta': -0.079122394323349, 'fcm_dpo/margin': 428.12445068359375, 'margin_dpo/margin_mean': 428.12445068359375, 'margin_dpo/margin_std': 619.9473876953125, 'logps/chosen': -676.9362182617188, 'logps/rejected': -1141.9404296875, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'KL/chosen_KL_mean': -621.676513671875, 'KL/rejected_KL_mean': -1049.801025390625, 'KL/mean': -835.73876953125, 'KL/std': 494.6927185058594, 'logits/chosen': -0.908329963684082, 'logits/rejected': -0.9156872034072876, 'epoch': 0.71} + 71%|███████▏ | 486/681 [20:35<07:43, 2.38s/it] 72%|███████▏ | 487/681 [20:38<07:58, 2.46s/it] {'loss': 1.125, 'grad_norm': 42.0443229675293, 'learning_rate': 1.1513436604424378e-07, 'fcm_dpo/beta': 0.0011161823058500886, 'fcm_dpo/q_t': 0.41450557112693787, 'fcm_dpo/delta': 0.0350751131772995, 'fcm_dpo/margin': 327.7616271972656, 'margin_dpo/margin_mean': 327.7616271972656, 'margin_dpo/margin_std': 490.9307861328125, 'logps/chosen': -720.7981567382812, 'logps/rejected': -1087.915283203125, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.41883087158203, 'KL/chosen_KL_mean': -667.73486328125, 'KL/rejected_KL_mean': -995.4964599609375, 'KL/mean': -831.6156616210938, 'KL/std': 467.8983154296875, 'logits/chosen': -0.8875995874404907, 'logits/rejected': -0.891254186630249, 'epoch': 0.72} + 72%|███████▏ | 487/681 [20:38<07:58, 2.46s/it] 72%|███████▏ | 488/681 [20:41<08:05, 2.52s/it] {'loss': 1.1052, 'grad_norm': 32.32245635986328, 'learning_rate': 1.1405557124304335e-07, 'fcm_dpo/beta': 0.0011240593157708645, 'fcm_dpo/q_t': 0.4142517149448395, 'fcm_dpo/delta': 0.02986850030720234, 'fcm_dpo/margin': 329.82708740234375, 'margin_dpo/margin_mean': 329.82708740234375, 'margin_dpo/margin_std': 427.99810791015625, 'logps/chosen': -606.0442504882812, 'logps/rejected': -967.6497802734375, 'logps/ref_chosen': -52.22815704345703, 'logps/ref_rejected': -84.00656127929688, 'KL/chosen_KL_mean': -553.8161010742188, 'KL/rejected_KL_mean': -883.6431884765625, 'KL/mean': -718.7296142578125, 'KL/std': 415.6312255859375, 'logits/chosen': -0.8679848313331604, 'logits/rejected': -0.8698484301567078, 'epoch': 0.72} + 72%|███████▏ | 488/681 [20:41<08:05, 2.52s/it] 72%|███████▏ | 489/681 [20:43<07:59, 2.50s/it] {'loss': 1.1145, 'grad_norm': 32.1641960144043, 'learning_rate': 1.1298035870396985e-07, 'fcm_dpo/beta': 0.001128336414694786, 'fcm_dpo/q_t': 0.4163089990615845, 'fcm_dpo/delta': 0.031364768743515015, 'fcm_dpo/margin': 327.616943359375, 'margin_dpo/margin_mean': 327.616943359375, 'margin_dpo/margin_std': 469.63079833984375, 'logps/chosen': -572.338134765625, 'logps/rejected': -923.3636474609375, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39812469482422, 'KL/chosen_KL_mean': -516.3485107421875, 'KL/rejected_KL_mean': -843.9655151367188, 'KL/mean': -680.156982421875, 'KL/std': 421.83648681640625, 'logits/chosen': -0.9042928218841553, 'logits/rejected': -0.8924728035926819, 'epoch': 0.72} + 72%|███████▏ | 489/681 [20:43<07:59, 2.50s/it] 72%|███████▏ | 490/681 [20:46<08:10, 2.57s/it] {'loss': 1.1397, 'grad_norm': 57.53245544433594, 'learning_rate': 1.1190875675987355e-07, 'fcm_dpo/beta': 0.001132056349888444, 'fcm_dpo/q_t': 0.4120814800262451, 'fcm_dpo/delta': 0.006162045523524284, 'fcm_dpo/margin': 347.96881103515625, 'margin_dpo/margin_mean': 347.96881103515625, 'margin_dpo/margin_std': 604.8751220703125, 'logps/chosen': -657.6494140625, 'logps/rejected': -1063.660888671875, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.4090576171875, 'KL/chosen_KL_mean': -605.282958984375, 'KL/rejected_KL_mean': -953.2518310546875, 'KL/mean': -779.2673950195312, 'KL/std': 508.89208984375, 'logits/chosen': -0.9138531684875488, 'logits/rejected': -0.9565045833587646, 'epoch': 0.72} + 72%|███████▏ | 490/681 [20:46<08:10, 2.57s/it] 72%|███████▏ | 491/681 [20:48<08:01, 2.54s/it] {'loss': 1.195, 'grad_norm': 30.0527400970459, 'learning_rate': 1.1084079364846241e-07, 'fcm_dpo/beta': 0.0011535290395841002, 'fcm_dpo/q_t': 0.438266396522522, 'fcm_dpo/delta': 0.1326218992471695, 'fcm_dpo/margin': 234.71804809570312, 'margin_dpo/margin_mean': 234.7180633544922, 'margin_dpo/margin_std': 441.885009765625, 'logps/chosen': -620.6292724609375, 'logps/rejected': -868.50390625, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'KL/chosen_KL_mean': -560.5130615234375, 'KL/rejected_KL_mean': -795.2310791015625, 'KL/mean': -677.8720703125, 'KL/std': 434.89801025390625, 'logits/chosen': -0.887237548828125, 'logits/rejected': -0.8714909553527832, 'epoch': 0.72} + 72%|███████▏ | 491/681 [20:48<08:01, 2.54s/it] 72%|███████▏ | 492/681 [20:51<08:03, 2.56s/it] {'loss': 1.2094, 'grad_norm': 29.874557495117188, 'learning_rate': 1.097764975115576e-07, 'fcm_dpo/beta': 0.0011779199121519923, 'fcm_dpo/q_t': 0.4370569884777069, 'fcm_dpo/delta': 0.121956005692482, 'fcm_dpo/margin': 239.11178588867188, 'margin_dpo/margin_mean': 239.1117706298828, 'margin_dpo/margin_std': 507.32025146484375, 'logps/chosen': -625.1259155273438, 'logps/rejected': -882.9031372070312, 'logps/ref_chosen': -53.994178771972656, 'logps/ref_rejected': -72.65962219238281, 'KL/chosen_KL_mean': -571.1317138671875, 'KL/rejected_KL_mean': -810.2435302734375, 'KL/mean': -690.6876220703125, 'KL/std': 437.35736083984375, 'logits/chosen': -0.9431591033935547, 'logits/rejected': -0.9226012229919434, 'epoch': 0.72} + 72%|███████▏ | 492/681 [20:51<08:03, 2.56s/it] 72%|███████▏ | 493/681 [20:54<08:09, 2.60s/it] {'loss': 1.1687, 'grad_norm': 33.20534896850586, 'learning_rate': 1.0871589639435203e-07, 'fcm_dpo/beta': 0.0011845249682664871, 'fcm_dpo/q_t': 0.42683732509613037, 'fcm_dpo/delta': -0.016040312126278877, 'fcm_dpo/margin': 261.7166748046875, 'margin_dpo/margin_mean': 261.7166748046875, 'margin_dpo/margin_std': 446.52606201171875, 'logps/chosen': -670.9610595703125, 'logps/rejected': -944.5035400390625, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'KL/chosen_KL_mean': -595.4638061523438, 'KL/rejected_KL_mean': -857.1805419921875, 'KL/mean': -726.3221435546875, 'KL/std': 469.4154968261719, 'logits/chosen': -0.9668236970901489, 'logits/rejected': -0.9259661436080933, 'epoch': 0.72} + 72%|███████▏ | 493/681 [20:54<08:09, 2.60s/it] 73%|███████▎ | 494/681 [20:56<08:07, 2.61s/it] {'loss': 1.0237, 'grad_norm': 43.84877395629883, 'learning_rate': 1.0765901824467166e-07, 'fcm_dpo/beta': 0.0011693753767758608, 'fcm_dpo/q_t': 0.38879674673080444, 'fcm_dpo/delta': -0.08248934149742126, 'fcm_dpo/margin': 409.18218994140625, 'margin_dpo/margin_mean': 409.18218994140625, 'margin_dpo/margin_std': 432.674072265625, 'logps/chosen': -519.382080078125, 'logps/rejected': -973.29638671875, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'KL/chosen_KL_mean': -478.02276611328125, 'KL/rejected_KL_mean': -887.2049560546875, 'KL/mean': -682.6138916015625, 'KL/std': 429.03424072265625, 'logits/chosen': -0.7634121179580688, 'logits/rejected': -0.7962794303894043, 'epoch': 0.73} + 73%|███████▎ | 494/681 [20:56<08:07, 2.61s/it] 73%|███████▎ | 495/681 [20:59<08:16, 2.67s/it] {'loss': 1.0991, 'grad_norm': 42.19923782348633, 'learning_rate': 1.0660589091223854e-07, 'fcm_dpo/beta': 0.0011636005947366357, 'fcm_dpo/q_t': 0.40708619356155396, 'fcm_dpo/delta': -0.020662881433963776, 'fcm_dpo/margin': 360.75115966796875, 'margin_dpo/margin_mean': 360.7511901855469, 'margin_dpo/margin_std': 544.379638671875, 'logps/chosen': -611.1054077148438, 'logps/rejected': -999.7459716796875, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'KL/chosen_KL_mean': -547.5703125, 'KL/rejected_KL_mean': -908.321533203125, 'KL/mean': -727.9459228515625, 'KL/std': 453.545654296875, 'logits/chosen': -0.9473215937614441, 'logits/rejected': -0.9521135091781616, 'epoch': 0.73} + 73%|███████▎ | 495/681 [20:59<08:16, 2.67s/it] 73%|███████▎ | 496/681 [21:02<08:20, 2.70s/it] {'loss': 1.2108, 'grad_norm': 57.562477111816406, 'learning_rate': 1.0555654214793722e-07, 'fcm_dpo/beta': 0.0011893340852111578, 'fcm_dpo/q_t': 0.44347870349884033, 'fcm_dpo/delta': 0.1618269979953766, 'fcm_dpo/margin': 203.4831085205078, 'margin_dpo/margin_mean': 203.48312377929688, 'margin_dpo/margin_std': 389.7113037109375, 'logps/chosen': -716.1163940429688, 'logps/rejected': -931.3369140625, 'logps/ref_chosen': -72.5919189453125, 'logps/ref_rejected': -84.32933807373047, 'KL/chosen_KL_mean': -643.5244750976562, 'KL/rejected_KL_mean': -847.007568359375, 'KL/mean': -745.2659912109375, 'KL/std': 351.7397766113281, 'logits/chosen': -0.9014628529548645, 'logits/rejected': -0.8666530251502991, 'epoch': 0.73} + 73%|███████▎ | 496/681 [21:02<08:20, 2.70s/it] 73%|███████▎ | 497/681 [21:04<08:10, 2.67s/it] {'loss': 1.2269, 'grad_norm': 40.526710510253906, 'learning_rate': 1.0451099960308374e-07, 'fcm_dpo/beta': 0.001203190186060965, 'fcm_dpo/q_t': 0.4448572099208832, 'fcm_dpo/delta': 0.019497813656926155, 'fcm_dpo/margin': 196.86973571777344, 'margin_dpo/margin_mean': 196.86973571777344, 'margin_dpo/margin_std': 415.5872802734375, 'logps/chosen': -687.9178466796875, 'logps/rejected': -902.4820556640625, 'logps/ref_chosen': -58.59397506713867, 'logps/ref_rejected': -76.28836822509766, 'KL/chosen_KL_mean': -629.3239135742188, 'KL/rejected_KL_mean': -826.1937255859375, 'KL/mean': -727.7587890625, 'KL/std': 427.0351867675781, 'logits/chosen': -0.879808783531189, 'logits/rejected': -0.8550307154655457, 'epoch': 0.73} + 73%|███████▎ | 497/681 [21:04<08:10, 2.67s/it] 73%|███████▎ | 498/681 [21:07<08:11, 2.69s/it] {'loss': 1.1448, 'grad_norm': 28.048709869384766, 'learning_rate': 1.0346929082869641e-07, 'fcm_dpo/beta': 0.00121046113781631, 'fcm_dpo/q_t': 0.416517436504364, 'fcm_dpo/delta': 0.04058893769979477, 'fcm_dpo/margin': 298.13177490234375, 'margin_dpo/margin_mean': 298.1318054199219, 'margin_dpo/margin_std': 504.37261962890625, 'logps/chosen': -695.6629028320312, 'logps/rejected': -1006.547119140625, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'KL/chosen_KL_mean': -624.457275390625, 'KL/rejected_KL_mean': -922.5890502929688, 'KL/mean': -773.5231323242188, 'KL/std': 460.3760986328125, 'logits/chosen': -0.8946305513381958, 'logits/rejected': -0.874454140663147, 'epoch': 0.73} + 73%|███████▎ | 498/681 [21:07<08:11, 2.69s/it] 73%|███████▎ | 499/681 [21:10<07:59, 2.63s/it] {'loss': 1.0497, 'grad_norm': 45.138729095458984, 'learning_rate': 1.0243144327477013e-07, 'fcm_dpo/beta': 0.0011967134196311235, 'fcm_dpo/q_t': 0.39139658212661743, 'fcm_dpo/delta': -0.08238838613033295, 'fcm_dpo/margin': 399.6925964355469, 'margin_dpo/margin_mean': 399.692626953125, 'margin_dpo/margin_std': 514.5656127929688, 'logps/chosen': -586.55908203125, 'logps/rejected': -1036.0751953125, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'KL/chosen_KL_mean': -535.3038940429688, 'KL/rejected_KL_mean': -934.9964599609375, 'KL/mean': -735.150146484375, 'KL/std': 470.1824951171875, 'logits/chosen': -0.8791499733924866, 'logits/rejected': -0.9143052697181702, 'epoch': 0.73} + 73%|███████▎ | 499/681 [21:10<07:59, 2.63s/it] 73%|███████▎ | 500/681 [21:12<07:46, 2.58s/it] {'loss': 1.0946, 'grad_norm': 30.78356170654297, 'learning_rate': 1.0139748428955333e-07, 'fcm_dpo/beta': 0.0011844468535855412, 'fcm_dpo/q_t': 0.4010527729988098, 'fcm_dpo/delta': -0.04147026687860489, 'fcm_dpo/margin': 371.12213134765625, 'margin_dpo/margin_mean': 371.12213134765625, 'margin_dpo/margin_std': 553.8121337890625, 'logps/chosen': -716.9344482421875, 'logps/rejected': -1124.96337890625, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'KL/chosen_KL_mean': -659.9070434570312, 'KL/rejected_KL_mean': -1031.0291748046875, 'KL/mean': -845.4681396484375, 'KL/std': 449.5928039550781, 'logits/chosen': -0.8759046792984009, 'logits/rejected': -0.9078898429870605, 'epoch': 0.73} + 73%|███████▎ | 500/681 [21:12<07:46, 2.58s/it] 74%|███████▎ | 501/681 [21:15<07:41, 2.56s/it] {'loss': 1.1294, 'grad_norm': 37.05495834350586, 'learning_rate': 1.0036744111882672e-07, 'fcm_dpo/beta': 0.001185835339128971, 'fcm_dpo/q_t': 0.4099680185317993, 'fcm_dpo/delta': -0.005243198946118355, 'fcm_dpo/margin': 341.43072509765625, 'margin_dpo/margin_mean': 341.4306640625, 'margin_dpo/margin_std': 574.979248046875, 'logps/chosen': -637.1468505859375, 'logps/rejected': -1004.3746948242188, 'logps/ref_chosen': -54.359527587890625, 'logps/ref_rejected': -80.15670013427734, 'KL/chosen_KL_mean': -582.7872924804688, 'KL/rejected_KL_mean': -924.218017578125, 'KL/mean': -753.5026245117188, 'KL/std': 446.186767578125, 'logits/chosen': -0.8284963369369507, 'logits/rejected': -0.8059309720993042, 'epoch': 0.74} + 74%|███████▎ | 501/681 [21:15<07:41, 2.56s/it] 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] {'loss': 1.0823, 'grad_norm': 26.800430297851562, 'learning_rate': 9.934134090518592e-08, 'fcm_dpo/beta': 0.0011830935254693031, 'fcm_dpo/q_t': 0.40695464611053467, 'fcm_dpo/delta': -0.006993459537625313, 'fcm_dpo/margin': 343.71856689453125, 'margin_dpo/margin_mean': 343.71856689453125, 'margin_dpo/margin_std': 436.3647155761719, 'logps/chosen': -619.2677001953125, 'logps/rejected': -978.33447265625, 'logps/ref_chosen': -67.60050964355469, 'logps/ref_rejected': -82.94876098632812, 'KL/chosen_KL_mean': -551.6671142578125, 'KL/rejected_KL_mean': -895.3857421875, 'KL/mean': -723.5264282226562, 'KL/std': 402.4056091308594, 'logits/chosen': -0.8308136463165283, 'logits/rejected': -0.8038866519927979, 'epoch': 0.74} + 74%|███████▎ | 502/681 [21:17<07:40, 2.57s/it] 74%|███████▍ | 503/681 [21:20<07:46, 2.62s/it] {'loss': 1.1058, 'grad_norm': 32.493919372558594, 'learning_rate': 9.831921068732571e-08, 'fcm_dpo/beta': 0.001180183608084917, 'fcm_dpo/q_t': 0.41350266337394714, 'fcm_dpo/delta': 0.017100892961025238, 'fcm_dpo/margin': 324.90771484375, 'margin_dpo/margin_mean': 324.90771484375, 'margin_dpo/margin_std': 457.2688293457031, 'logps/chosen': -602.74169921875, 'logps/rejected': -955.0765380859375, 'logps/ref_chosen': -55.078407287597656, 'logps/ref_rejected': -82.50544738769531, 'KL/chosen_KL_mean': -547.663330078125, 'KL/rejected_KL_mean': -872.571044921875, 'KL/mean': -710.1171875, 'KL/std': 404.2541809082031, 'logits/chosen': -0.7998204231262207, 'logits/rejected': -0.7804523706436157, 'epoch': 0.74} + 74%|███████▍ | 503/681 [21:20<07:46, 2.62s/it] 74%|███████▍ | 504/681 [21:22<07:41, 2.60s/it] {'loss': 1.0663, 'grad_norm': 34.34120178222656, 'learning_rate': 9.730107739932805e-08, 'fcm_dpo/beta': 0.0011717536253854632, 'fcm_dpo/q_t': 0.39360561966896057, 'fcm_dpo/delta': -0.07761284708976746, 'fcm_dpo/margin': 404.5033874511719, 'margin_dpo/margin_mean': 404.50335693359375, 'margin_dpo/margin_std': 555.4103393554688, 'logps/chosen': -654.0133666992188, 'logps/rejected': -1102.3131103515625, 'logps/ref_chosen': -59.96575164794922, 'logps/ref_rejected': -103.76212310791016, 'KL/chosen_KL_mean': -594.047607421875, 'KL/rejected_KL_mean': -998.551025390625, 'KL/mean': -796.29931640625, 'KL/std': 500.2449645996094, 'logits/chosen': -0.849440336227417, 'logits/rejected': -0.8721122741699219, 'epoch': 0.74} + 74%|███████▍ | 504/681 [21:22<07:41, 2.60s/it] 74%|███████▍ | 505/681 [21:25<07:36, 2.60s/it] {'loss': 1.2136, 'grad_norm': 45.66891098022461, 'learning_rate': 9.628696786995188e-08, 'fcm_dpo/beta': 0.0011943629942834377, 'fcm_dpo/q_t': 0.4423941969871521, 'fcm_dpo/delta': 0.15151187777519226, 'fcm_dpo/margin': 211.07833862304688, 'margin_dpo/margin_mean': 211.07835388183594, 'margin_dpo/margin_std': 428.6888427734375, 'logps/chosen': -739.5933227539062, 'logps/rejected': -963.1021728515625, 'logps/ref_chosen': -76.1549072265625, 'logps/ref_rejected': -88.58537292480469, 'KL/chosen_KL_mean': -663.4384155273438, 'KL/rejected_KL_mean': -874.516845703125, 'KL/mean': -768.9776611328125, 'KL/std': 459.5237731933594, 'logits/chosen': -0.8771257400512695, 'logits/rejected': -0.8454539775848389, 'epoch': 0.74} + 74%|███████▍ | 505/681 [21:25<07:36, 2.60s/it] 74%|███████▍ | 506/681 [21:27<07:23, 2.54s/it] {'loss': 1.0838, 'grad_norm': 24.074623107910156, 'learning_rate': 9.527690882192635e-08, 'fcm_dpo/beta': 0.00119347358122468, 'fcm_dpo/q_t': 0.40311557054519653, 'fcm_dpo/delta': -0.025928327813744545, 'fcm_dpo/margin': 355.7054748535156, 'margin_dpo/margin_mean': 355.7054748535156, 'margin_dpo/margin_std': 483.68572998046875, 'logps/chosen': -568.5978393554688, 'logps/rejected': -953.7578125, 'logps/ref_chosen': -48.96050262451172, 'logps/ref_rejected': -78.41505432128906, 'KL/chosen_KL_mean': -519.6373291015625, 'KL/rejected_KL_mean': -875.3427734375, 'KL/mean': -697.4900512695312, 'KL/std': 440.137939453125, 'logits/chosen': -0.8389706611633301, 'logits/rejected': -0.8414362668991089, 'epoch': 0.74} + 74%|███████▍ | 506/681 [21:27<07:23, 2.54s/it] 74%|███████▍ | 507/681 [21:30<07:25, 2.56s/it] {'loss': 1.1265, 'grad_norm': 34.941505432128906, 'learning_rate': 9.427092687124691e-08, 'fcm_dpo/beta': 0.001195873599499464, 'fcm_dpo/q_t': 0.4149119257926941, 'fcm_dpo/delta': 0.011642876081168652, 'fcm_dpo/margin': 325.09228515625, 'margin_dpo/margin_mean': 325.09228515625, 'margin_dpo/margin_std': 534.9813232421875, 'logps/chosen': -627.6681518554688, 'logps/rejected': -981.3319091796875, 'logps/ref_chosen': -66.80149841308594, 'logps/ref_rejected': -95.37289428710938, 'KL/chosen_KL_mean': -560.86669921875, 'KL/rejected_KL_mean': -885.958984375, 'KL/mean': -723.412841796875, 'KL/std': 489.5423583984375, 'logits/chosen': -0.8597081899642944, 'logits/rejected': -0.8631129264831543, 'epoch': 0.74} + 74%|███████▍ | 507/681 [21:30<07:25, 2.56s/it] 75%|███████▍ | 508/681 [21:33<07:32, 2.61s/it] {'loss': 1.2015, 'grad_norm': 38.527687072753906, 'learning_rate': 9.326904852647344e-08, 'fcm_dpo/beta': 0.0012166362721472979, 'fcm_dpo/q_t': 0.4304507374763489, 'fcm_dpo/delta': 0.08571073412895203, 'fcm_dpo/margin': 260.345703125, 'margin_dpo/margin_mean': 260.345703125, 'margin_dpo/margin_std': 558.1546630859375, 'logps/chosen': -676.5274658203125, 'logps/rejected': -961.197265625, 'logps/ref_chosen': -71.303466796875, 'logps/ref_rejected': -95.6275405883789, 'KL/chosen_KL_mean': -605.2239990234375, 'KL/rejected_KL_mean': -865.5697021484375, 'KL/mean': -735.3968505859375, 'KL/std': 466.6279296875, 'logits/chosen': -0.8243334889411926, 'logits/rejected': -0.8081272840499878, 'epoch': 0.75} + 75%|███████▍ | 508/681 [21:33<07:32, 2.61s/it] 75%|███████▍ | 509/681 [21:35<07:29, 2.61s/it] {'loss': 1.1423, 'grad_norm': 32.919189453125, 'learning_rate': 9.227130018803195e-08, 'fcm_dpo/beta': 0.001233469694852829, 'fcm_dpo/q_t': 0.4210923910140991, 'fcm_dpo/delta': 0.05139687657356262, 'fcm_dpo/margin': 283.694091796875, 'margin_dpo/margin_mean': 283.6940612792969, 'margin_dpo/margin_std': 457.988525390625, 'logps/chosen': -558.4744262695312, 'logps/rejected': -861.60595703125, 'logps/ref_chosen': -63.81895065307617, 'logps/ref_rejected': -83.25643920898438, 'KL/chosen_KL_mean': -494.65545654296875, 'KL/rejected_KL_mean': -778.3495483398438, 'KL/mean': -636.5025024414062, 'KL/std': 360.523193359375, 'logits/chosen': -0.7569400668144226, 'logits/rejected': -0.7431646585464478, 'epoch': 0.75} + 75%|███████▍ | 509/681 [21:35<07:29, 2.61s/it] 75%|███████▍ | 510/681 [21:38<07:31, 2.64s/it] {'loss': 1.0368, 'grad_norm': 34.82098388671875, 'learning_rate': 9.127770814751932e-08, 'fcm_dpo/beta': 0.001223585568368435, 'fcm_dpo/q_t': 0.39347004890441895, 'fcm_dpo/delta': -0.05761527270078659, 'fcm_dpo/margin': 371.8828125, 'margin_dpo/margin_mean': 371.8828125, 'margin_dpo/margin_std': 393.40496826171875, 'logps/chosen': -636.17138671875, 'logps/rejected': -1058.94091796875, 'logps/ref_chosen': -51.878448486328125, 'logps/ref_rejected': -102.7651596069336, 'KL/chosen_KL_mean': -584.29296875, 'KL/rejected_KL_mean': -956.17578125, 'KL/mean': -770.234375, 'KL/std': 406.34039306640625, 'logits/chosen': -0.7776767611503601, 'logits/rejected': -0.7976189851760864, 'epoch': 0.75} + 75%|███████▍ | 510/681 [21:38<07:31, 2.64s/it] 75%|███████▌ | 511/681 [21:41<07:26, 2.62s/it] {'loss': 1.1419, 'grad_norm': 30.70000648498535, 'learning_rate': 9.028829858700973e-08, 'fcm_dpo/beta': 0.0012225550599396229, 'fcm_dpo/q_t': 0.41429954767227173, 'fcm_dpo/delta': 0.0234109815210104, 'fcm_dpo/margin': 308.7491455078125, 'margin_dpo/margin_mean': 308.7491455078125, 'margin_dpo/margin_std': 537.5291748046875, 'logps/chosen': -604.494140625, 'logps/rejected': -945.8619384765625, 'logps/ref_chosen': -60.23811721801758, 'logps/ref_rejected': -92.85676574707031, 'KL/chosen_KL_mean': -544.2560424804688, 'KL/rejected_KL_mean': -853.0051879882812, 'KL/mean': -698.630615234375, 'KL/std': 448.10504150390625, 'logits/chosen': -0.8361300230026245, 'logits/rejected': -0.8324530124664307, 'epoch': 0.75} + 75%|███████▌ | 511/681 [21:41<07:26, 2.62s/it] 75%|███████▌ | 512/681 [21:43<07:05, 2.52s/it] {'loss': 1.0147, 'grad_norm': 36.188262939453125, 'learning_rate': 8.930309757836516e-08, 'fcm_dpo/beta': 0.0012053523678332567, 'fcm_dpo/q_t': 0.3863917589187622, 'fcm_dpo/delta': -0.09257997572422028, 'fcm_dpo/margin': 404.79510498046875, 'margin_dpo/margin_mean': 404.79510498046875, 'margin_dpo/margin_std': 416.42584228515625, 'logps/chosen': -505.7539978027344, 'logps/rejected': -937.51953125, 'logps/ref_chosen': -54.905494689941406, 'logps/ref_rejected': -81.87586975097656, 'KL/chosen_KL_mean': -450.8485107421875, 'KL/rejected_KL_mean': -855.6436157226562, 'KL/mean': -653.24609375, 'KL/std': 413.8804931640625, 'logits/chosen': -0.8050196170806885, 'logits/rejected': -0.8159193396568298, 'epoch': 0.75} + 75%|███████▌ | 512/681 [21:43<07:05, 2.52s/it] 75%|███████▌ | 513/681 [21:46<07:13, 2.58s/it] {'loss': 1.1508, 'grad_norm': 37.652217864990234, 'learning_rate': 8.832213108254863e-08, 'fcm_dpo/beta': 0.0011912956833839417, 'fcm_dpo/q_t': 0.42267322540283203, 'fcm_dpo/delta': -0.04120471701025963, 'fcm_dpo/margin': 281.6656799316406, 'margin_dpo/margin_mean': 281.6656494140625, 'margin_dpo/margin_std': 448.5667724609375, 'logps/chosen': -624.47119140625, 'logps/rejected': -917.2828369140625, 'logps/ref_chosen': -64.91644287109375, 'logps/ref_rejected': -76.06245422363281, 'KL/chosen_KL_mean': -559.5546875, 'KL/rejected_KL_mean': -841.2203979492188, 'KL/mean': -700.3875732421875, 'KL/std': 382.072021484375, 'logits/chosen': -0.8725820183753967, 'logits/rejected': -0.8468344211578369, 'epoch': 0.75} + 75%|███████▌ | 513/681 [21:46<07:13, 2.58s/it] 75%|███████▌ | 514/681 [21:48<07:15, 2.61s/it] {'loss': 1.1703, 'grad_norm': 31.276504516601562, 'learning_rate': 8.734542494893954e-08, 'fcm_dpo/beta': 0.0012082626344636083, 'fcm_dpo/q_t': 0.4297820031642914, 'fcm_dpo/delta': 0.08919873833656311, 'fcm_dpo/margin': 259.47412109375, 'margin_dpo/margin_mean': 259.4741516113281, 'margin_dpo/margin_std': 465.32330322265625, 'logps/chosen': -663.6689453125, 'logps/rejected': -927.8590698242188, 'logps/ref_chosen': -74.22957611083984, 'logps/ref_rejected': -78.945556640625, 'KL/chosen_KL_mean': -589.4393920898438, 'KL/rejected_KL_mean': -848.9135131835938, 'KL/mean': -719.176513671875, 'KL/std': 411.193359375, 'logits/chosen': -0.8355896472930908, 'logits/rejected': -0.809748649597168, 'epoch': 0.75} + 75%|███████▌ | 514/681 [21:48<07:15, 2.61s/it] 76%|███████▌ | 515/681 [21:51<07:13, 2.61s/it] {'loss': 1.1746, 'grad_norm': 41.97957992553711, 'learning_rate': 8.637300491465272e-08, 'fcm_dpo/beta': 0.0012302729301154613, 'fcm_dpo/q_t': 0.4292389154434204, 'fcm_dpo/delta': 0.09674014896154404, 'fcm_dpo/margin': 248.9129638671875, 'margin_dpo/margin_mean': 248.9129638671875, 'margin_dpo/margin_std': 451.3743896484375, 'logps/chosen': -552.275146484375, 'logps/rejected': -837.88427734375, 'logps/ref_chosen': -50.40156555175781, 'logps/ref_rejected': -87.09774780273438, 'KL/chosen_KL_mean': -501.87353515625, 'KL/rejected_KL_mean': -750.7864990234375, 'KL/mean': -626.3300170898438, 'KL/std': 381.322021484375, 'logits/chosen': -0.8100356459617615, 'logits/rejected': -0.8176305294036865, 'epoch': 0.76} + 76%|███████▌ | 515/681 [21:51<07:13, 2.61s/it] 76%|███████▌ | 516/681 [21:53<06:50, 2.49s/it] {'loss': 1.0631, 'grad_norm': 31.350547790527344, 'learning_rate': 8.540489660386064e-08, 'fcm_dpo/beta': 0.0012345185969024897, 'fcm_dpo/q_t': 0.3999173939228058, 'fcm_dpo/delta': -0.031035784631967545, 'fcm_dpo/margin': 347.9979248046875, 'margin_dpo/margin_mean': 347.9979553222656, 'margin_dpo/margin_std': 409.755126953125, 'logps/chosen': -577.350830078125, 'logps/rejected': -972.4215698242188, 'logps/ref_chosen': -64.64956665039062, 'logps/ref_rejected': -111.72237396240234, 'KL/chosen_KL_mean': -512.7012939453125, 'KL/rejected_KL_mean': -860.69921875, 'KL/mean': -686.7001953125, 'KL/std': 412.843017578125, 'logits/chosen': -0.883063018321991, 'logits/rejected': -0.9014164209365845, 'epoch': 0.76} + 76%|███████▌ | 516/681 [21:53<06:50, 2.49s/it] 76%|███████▌ | 517/681 [21:56<06:49, 2.50s/it] {'loss': 1.0379, 'grad_norm': 25.801740646362305, 'learning_rate': 8.444112552711752e-08, 'fcm_dpo/beta': 0.0012100373860448599, 'fcm_dpo/q_t': 0.39081257581710815, 'fcm_dpo/delta': -0.09078750759363174, 'fcm_dpo/margin': 401.75970458984375, 'margin_dpo/margin_mean': 401.759765625, 'margin_dpo/margin_std': 496.04901123046875, 'logps/chosen': -599.474609375, 'logps/rejected': -1029.4039306640625, 'logps/ref_chosen': -60.913551330566406, 'logps/ref_rejected': -89.08308410644531, 'KL/chosen_KL_mean': -538.5610961914062, 'KL/rejected_KL_mean': -940.3209228515625, 'KL/mean': -739.44091796875, 'KL/std': 451.02313232421875, 'logits/chosen': -0.8275068402290344, 'logits/rejected': -0.8201382160186768, 'epoch': 0.76} + 76%|███████▌ | 517/681 [21:56<06:49, 2.50s/it] 76%|███████▌ | 518/681 [21:58<06:58, 2.56s/it] {'loss': 1.0897, 'grad_norm': 30.128890991210938, 'learning_rate': 8.348171708068747e-08, 'fcm_dpo/beta': 0.0012026941403746605, 'fcm_dpo/q_t': 0.4073007106781006, 'fcm_dpo/delta': 0.0016661733388900757, 'fcm_dpo/margin': 331.0551452636719, 'margin_dpo/margin_mean': 331.05511474609375, 'margin_dpo/margin_std': 424.0645751953125, 'logps/chosen': -577.3682861328125, 'logps/rejected': -936.2802124023438, 'logps/ref_chosen': -57.45589065551758, 'logps/ref_rejected': -85.31269836425781, 'KL/chosen_KL_mean': -519.912353515625, 'KL/rejected_KL_mean': -850.967529296875, 'KL/mean': -685.43994140625, 'KL/std': 387.7268371582031, 'logits/chosen': -0.8605848550796509, 'logits/rejected': -0.8738881945610046, 'epoch': 0.76} + 76%|███████▌ | 518/681 [21:58<06:58, 2.56s/it] 76%|███████▌ | 519/681 [22:01<07:15, 2.69s/it] {'loss': 1.193, 'grad_norm': 43.568424224853516, 'learning_rate': 8.25266965458755e-08, 'fcm_dpo/beta': 0.0012314484920352697, 'fcm_dpo/q_t': 0.4326602518558502, 'fcm_dpo/delta': 0.1115046888589859, 'fcm_dpo/margin': 236.4358367919922, 'margin_dpo/margin_mean': 236.4358367919922, 'margin_dpo/margin_std': 456.2001953125, 'logps/chosen': -595.201171875, 'logps/rejected': -862.0178833007812, 'logps/ref_chosen': -74.06331634521484, 'logps/ref_rejected': -104.44416809082031, 'KL/chosen_KL_mean': -521.1378784179688, 'KL/rejected_KL_mean': -757.57373046875, 'KL/mean': -639.3558349609375, 'KL/std': 362.4400939941406, 'logits/chosen': -0.8680436015129089, 'logits/rejected': -0.847466230392456, 'epoch': 0.76} + 76%|███████▌ | 519/681 [22:01<07:15, 2.69s/it] 76%|███████▋ | 520/681 [22:04<07:11, 2.68s/it] {'loss': 1.1323, 'grad_norm': 27.024534225463867, 'learning_rate': 8.15760890883607e-08, 'fcm_dpo/beta': 0.001238158904016018, 'fcm_dpo/q_t': 0.41828474402427673, 'fcm_dpo/delta': 0.03627597913146019, 'fcm_dpo/margin': 294.8129577636719, 'margin_dpo/margin_mean': 294.8129577636719, 'margin_dpo/margin_std': 456.40606689453125, 'logps/chosen': -639.4136962890625, 'logps/rejected': -963.908203125, 'logps/ref_chosen': -70.2998275756836, 'logps/ref_rejected': -99.98133850097656, 'KL/chosen_KL_mean': -569.1138916015625, 'KL/rejected_KL_mean': -863.9268188476562, 'KL/mean': -716.5203857421875, 'KL/std': 410.4988098144531, 'logits/chosen': -0.779923677444458, 'logits/rejected': -0.7774548530578613, 'epoch': 0.76} + 76%|███████▋ | 520/681 [22:04<07:11, 2.68s/it] 77%|███████▋ | 521/681 [22:07<07:07, 2.67s/it] {'loss': 1.089, 'grad_norm': 31.882793426513672, 'learning_rate': 8.062991975753378e-08, 'fcm_dpo/beta': 0.0012491261586546898, 'fcm_dpo/q_t': 0.40750136971473694, 'fcm_dpo/delta': -0.0038999132812023163, 'fcm_dpo/margin': 322.8023681640625, 'margin_dpo/margin_mean': 322.8023681640625, 'margin_dpo/margin_std': 418.15899658203125, 'logps/chosen': -554.1742553710938, 'logps/rejected': -902.1143188476562, 'logps/ref_chosen': -58.14292526245117, 'logps/ref_rejected': -83.28060913085938, 'KL/chosen_KL_mean': -496.0313415527344, 'KL/rejected_KL_mean': -818.833740234375, 'KL/mean': -657.4324951171875, 'KL/std': 417.3710021972656, 'logits/chosen': -0.8779969215393066, 'logits/rejected': -0.8690969944000244, 'epoch': 0.77} + 77%|███████▋ | 521/681 [22:07<07:07, 2.67s/it] 77%|███████▋ | 522/681 [22:09<07:01, 2.65s/it] {'loss': 1.1203, 'grad_norm': 31.201757431030273, 'learning_rate': 7.968821348583643e-08, 'fcm_dpo/beta': 0.001243784325197339, 'fcm_dpo/q_t': 0.4114811420440674, 'fcm_dpo/delta': 0.009488995186984539, 'fcm_dpo/margin': 314.26910400390625, 'margin_dpo/margin_mean': 314.26910400390625, 'margin_dpo/margin_std': 489.2512512207031, 'logps/chosen': -630.123046875, 'logps/rejected': -963.8583984375, 'logps/ref_chosen': -46.54766845703125, 'logps/ref_rejected': -66.01388549804688, 'KL/chosen_KL_mean': -583.5753784179688, 'KL/rejected_KL_mean': -897.844482421875, 'KL/mean': -740.7099609375, 'KL/std': 467.26300048828125, 'logits/chosen': -0.8858053088188171, 'logits/rejected': -0.8832094669342041, 'epoch': 0.77} + 77%|███████▋ | 522/681 [22:09<07:01, 2.65s/it] 77%|███████▋ | 523/681 [22:12<07:02, 2.67s/it] {'loss': 1.1124, 'grad_norm': 33.407474517822266, 'learning_rate': 7.875099508810484e-08, 'fcm_dpo/beta': 0.0012416969984769821, 'fcm_dpo/q_t': 0.40742552280426025, 'fcm_dpo/delta': -0.012975066900253296, 'fcm_dpo/margin': 332.15032958984375, 'margin_dpo/margin_mean': 332.15032958984375, 'margin_dpo/margin_std': 522.383056640625, 'logps/chosen': -649.0885009765625, 'logps/rejected': -1003.2305908203125, 'logps/ref_chosen': -61.76960372924805, 'logps/ref_rejected': -83.76141357421875, 'KL/chosen_KL_mean': -587.31884765625, 'KL/rejected_KL_mean': -919.4691772460938, 'KL/mean': -753.39404296875, 'KL/std': 486.9326477050781, 'logits/chosen': -0.9253371953964233, 'logits/rejected': -0.9181835055351257, 'epoch': 0.77} + 77%|███████▋ | 523/681 [22:12<07:02, 2.67s/it] 77%|███████▋ | 524/681 [22:15<06:57, 2.66s/it] {'loss': 1.114, 'grad_norm': 39.42531967163086, 'learning_rate': 7.781828926091535e-08, 'fcm_dpo/beta': 0.0012377724051475525, 'fcm_dpo/q_t': 0.40860164165496826, 'fcm_dpo/delta': 0.009213726967573166, 'fcm_dpo/margin': 315.73992919921875, 'margin_dpo/margin_mean': 315.7398986816406, 'margin_dpo/margin_std': 464.941162109375, 'logps/chosen': -671.448974609375, 'logps/rejected': -990.4188232421875, 'logps/ref_chosen': -78.0720443725586, 'logps/ref_rejected': -81.30198669433594, 'KL/chosen_KL_mean': -593.376953125, 'KL/rejected_KL_mean': -909.1168212890625, 'KL/mean': -751.2468872070312, 'KL/std': 469.7861633300781, 'logits/chosen': -0.9645393490791321, 'logits/rejected': -0.9483359456062317, 'epoch': 0.77} + 77%|███████▋ | 524/681 [22:15<06:57, 2.66s/it] 77%|███████▋ | 525/681 [22:17<06:57, 2.67s/it] {'loss': 1.0139, 'grad_norm': 28.412078857421875, 'learning_rate': 7.689012058193384e-08, 'fcm_dpo/beta': 0.0012156711891293526, 'fcm_dpo/q_t': 0.3830450177192688, 'fcm_dpo/delta': -0.13079476356506348, 'fcm_dpo/margin': 430.468017578125, 'margin_dpo/margin_mean': 430.468017578125, 'margin_dpo/margin_std': 507.2789306640625, 'logps/chosen': -588.2077026367188, 'logps/rejected': -1067.90087890625, 'logps/ref_chosen': -50.827857971191406, 'logps/ref_rejected': -100.05294036865234, 'KL/chosen_KL_mean': -537.3798828125, 'KL/rejected_KL_mean': -967.847900390625, 'KL/mean': -752.6138305664062, 'KL/std': 474.93743896484375, 'logits/chosen': -0.8482464551925659, 'logits/rejected': -0.8838850259780884, 'epoch': 0.77} + 77%|███████▋ | 525/681 [22:17<06:57, 2.67s/it] 77%|███████▋ | 526/681 [22:20<06:48, 2.63s/it] {'loss': 1.0352, 'grad_norm': 30.65778923034668, 'learning_rate': 7.596651350926836e-08, 'fcm_dpo/beta': 0.0011959581170231104, 'fcm_dpo/q_t': 0.384555459022522, 'fcm_dpo/delta': -0.11271873861551285, 'fcm_dpo/margin': 424.0378112792969, 'margin_dpo/margin_mean': 424.0378112792969, 'margin_dpo/margin_std': 526.3082885742188, 'logps/chosen': -656.988525390625, 'logps/rejected': -1104.16845703125, 'logps/ref_chosen': -63.167236328125, 'logps/ref_rejected': -86.30934143066406, 'KL/chosen_KL_mean': -593.8212890625, 'KL/rejected_KL_mean': -1017.8590087890625, 'KL/mean': -805.8401489257812, 'KL/std': 471.7684020996094, 'logits/chosen': -0.906538724899292, 'logits/rejected': -0.895421028137207, 'epoch': 0.77} + 77%|███████▋ | 526/681 [22:20<06:48, 2.63s/it] 77%|███████▋ | 527/681 [22:22<06:40, 2.60s/it] {'loss': 1.138, 'grad_norm': 31.139324188232422, 'learning_rate': 7.504749238082414e-08, 'fcm_dpo/beta': 0.0011956689413636923, 'fcm_dpo/q_t': 0.4224342703819275, 'fcm_dpo/delta': 0.07093732804059982, 'fcm_dpo/margin': 277.2255859375, 'margin_dpo/margin_mean': 277.2255859375, 'margin_dpo/margin_std': 403.221435546875, 'logps/chosen': -691.8319702148438, 'logps/rejected': -976.2713623046875, 'logps/ref_chosen': -71.12867736816406, 'logps/ref_rejected': -78.3425521850586, 'KL/chosen_KL_mean': -620.7032470703125, 'KL/rejected_KL_mean': -897.9288330078125, 'KL/mean': -759.3160400390625, 'KL/std': 481.2168273925781, 'logits/chosen': -1.0832974910736084, 'logits/rejected': -1.045976996421814, 'epoch': 0.77} + 77%|███████▋ | 527/681 [22:22<06:40, 2.60s/it] 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] {'loss': 1.1349, 'grad_norm': 40.499786376953125, 'learning_rate': 7.413308141366254e-08, 'fcm_dpo/beta': 0.0012017192784696817, 'fcm_dpo/q_t': 0.41412389278411865, 'fcm_dpo/delta': 0.007483818102627993, 'fcm_dpo/margin': 326.864013671875, 'margin_dpo/margin_mean': 326.864013671875, 'margin_dpo/margin_std': 558.260986328125, 'logps/chosen': -692.6829833984375, 'logps/rejected': -1045.3675537109375, 'logps/ref_chosen': -68.0894546508789, 'logps/ref_rejected': -93.91006469726562, 'KL/chosen_KL_mean': -624.593505859375, 'KL/rejected_KL_mean': -951.45751953125, 'KL/mean': -788.0255126953125, 'KL/std': 467.02947998046875, 'logits/chosen': -0.9295456409454346, 'logits/rejected': -0.9090088605880737, 'epoch': 0.78} + 78%|███████▊ | 528/681 [22:25<06:42, 2.63s/it] 78%|███████▊ | 529/681 [22:28<06:42, 2.65s/it] {'loss': 1.2262, 'grad_norm': 66.41038513183594, 'learning_rate': 7.322330470336313e-08, 'fcm_dpo/beta': 0.0012046921765431762, 'fcm_dpo/q_t': 0.43421024084091187, 'fcm_dpo/delta': 0.005615768022835255, 'fcm_dpo/margin': 233.68368530273438, 'margin_dpo/margin_mean': 233.68368530273438, 'margin_dpo/margin_std': 538.73583984375, 'logps/chosen': -778.487060546875, 'logps/rejected': -1045.804931640625, 'logps/ref_chosen': -55.57495880126953, 'logps/ref_rejected': -89.20909118652344, 'KL/chosen_KL_mean': -722.912109375, 'KL/rejected_KL_mean': -956.5958251953125, 'KL/mean': -839.7539672851562, 'KL/std': 438.76708984375, 'logits/chosen': -0.9495022296905518, 'logits/rejected': -0.9608061909675598, 'epoch': 0.78} + 78%|███████▊ | 529/681 [22:28<06:42, 2.65s/it] 78%|███████▊ | 530/681 [22:31<06:49, 2.71s/it] {'loss': 1.1169, 'grad_norm': 55.660064697265625, 'learning_rate': 7.231818622338822e-08, 'fcm_dpo/beta': 0.001198010751977563, 'fcm_dpo/q_t': 0.40217673778533936, 'fcm_dpo/delta': -0.04596946761012077, 'fcm_dpo/margin': 370.57501220703125, 'margin_dpo/margin_mean': 370.574951171875, 'margin_dpo/margin_std': 630.6913452148438, 'logps/chosen': -636.2828369140625, 'logps/rejected': -1046.5408935546875, 'logps/ref_chosen': -47.601417541503906, 'logps/ref_rejected': -87.2845230102539, 'KL/chosen_KL_mean': -588.6814575195312, 'KL/rejected_KL_mean': -959.25634765625, 'KL/mean': -773.9688720703125, 'KL/std': 510.5151672363281, 'logits/chosen': -0.8760310411453247, 'logits/rejected': -0.8702448606491089, 'epoch': 0.78} + 78%|███████▊ | 530/681 [22:31<06:49, 2.71s/it] 78%|███████▊ | 531/681 [22:33<06:27, 2.58s/it] {'loss': 1.1404, 'grad_norm': 38.57360076904297, 'learning_rate': 7.141774982445147e-08, 'fcm_dpo/beta': 0.0012029436184093356, 'fcm_dpo/q_t': 0.4182535409927368, 'fcm_dpo/delta': 0.03640556335449219, 'fcm_dpo/margin': 303.2028503417969, 'margin_dpo/margin_mean': 303.2028503417969, 'margin_dpo/margin_std': 503.85516357421875, 'logps/chosen': -713.08544921875, 'logps/rejected': -1031.648193359375, 'logps/ref_chosen': -55.246063232421875, 'logps/ref_rejected': -70.60598754882812, 'KL/chosen_KL_mean': -657.83935546875, 'KL/rejected_KL_mean': -961.042236328125, 'KL/mean': -809.4407958984375, 'KL/std': 517.9159545898438, 'logits/chosen': -0.9681419134140015, 'logits/rejected': -0.9443149566650391, 'epoch': 0.78} + 78%|███████▊ | 531/681 [22:33<06:27, 2.58s/it] 78%|███████▊ | 532/681 [22:36<06:30, 2.62s/it] {'loss': 1.12, 'grad_norm': 54.94654846191406, 'learning_rate': 7.052201923388953e-08, 'fcm_dpo/beta': 0.0011894925264641643, 'fcm_dpo/q_t': 0.405168741941452, 'fcm_dpo/delta': -0.026376843452453613, 'fcm_dpo/margin': 356.58917236328125, 'margin_dpo/margin_mean': 356.58917236328125, 'margin_dpo/margin_std': 579.14501953125, 'logps/chosen': -706.236083984375, 'logps/rejected': -1079.1304931640625, 'logps/ref_chosen': -70.28601837158203, 'logps/ref_rejected': -86.5913314819336, 'KL/chosen_KL_mean': -635.9500732421875, 'KL/rejected_KL_mean': -992.5391845703125, 'KL/mean': -814.24462890625, 'KL/std': 492.651123046875, 'logits/chosen': -0.8919358849525452, 'logits/rejected': -0.8668221235275269, 'epoch': 0.78} + 78%|███████▊ | 532/681 [22:36<06:30, 2.62s/it] 78%|███████▊ | 533/681 [22:38<06:25, 2.61s/it] {'loss': 1.1921, 'grad_norm': 44.284828186035156, 'learning_rate': 6.963101805503646e-08, 'fcm_dpo/beta': 0.0012182076461613178, 'fcm_dpo/q_t': 0.4302961230278015, 'fcm_dpo/delta': 0.09259242564439774, 'fcm_dpo/margin': 254.03118896484375, 'margin_dpo/margin_mean': 254.03115844726562, 'margin_dpo/margin_std': 512.8229370117188, 'logps/chosen': -643.0594482421875, 'logps/rejected': -908.8236083984375, 'logps/ref_chosen': -64.8551025390625, 'logps/ref_rejected': -76.58805847167969, 'KL/chosen_KL_mean': -578.204345703125, 'KL/rejected_KL_mean': -832.2355346679688, 'KL/mean': -705.219970703125, 'KL/std': 433.0811767578125, 'logits/chosen': -0.8935759663581848, 'logits/rejected': -0.8628044128417969, 'epoch': 0.78} + 78%|███████▊ | 533/681 [22:38<06:25, 2.61s/it] 78%|███████▊ | 534/681 [22:41<06:21, 2.60s/it] {'loss': 1.0979, 'grad_norm': 33.67475509643555, 'learning_rate': 6.874476976660184e-08, 'fcm_dpo/beta': 0.0012101430911570787, 'fcm_dpo/q_t': 0.4076574742794037, 'fcm_dpo/delta': -0.0032868273556232452, 'fcm_dpo/margin': 332.4776611328125, 'margin_dpo/margin_mean': 332.4776611328125, 'margin_dpo/margin_std': 454.71746826171875, 'logps/chosen': -653.4230346679688, 'logps/rejected': -1004.3247680664062, 'logps/ref_chosen': -60.119388580322266, 'logps/ref_rejected': -78.54347229003906, 'KL/chosen_KL_mean': -593.3036499023438, 'KL/rejected_KL_mean': -925.7813110351562, 'KL/mean': -759.54248046875, 'KL/std': 448.52166748046875, 'logits/chosen': -0.8921518325805664, 'logits/rejected': -0.8880842328071594, 'epoch': 0.78} + 78%|███████▊ | 534/681 [22:41<06:21, 2.60s/it] 79%|███████▊ | 535/681 [22:43<06:18, 2.59s/it] {'loss': 1.0524, 'grad_norm': 31.497711181640625, 'learning_rate': 6.786329772205246e-08, 'fcm_dpo/beta': 0.0012131070252507925, 'fcm_dpo/q_t': 0.3943653106689453, 'fcm_dpo/delta': -0.06780680269002914, 'fcm_dpo/margin': 382.72406005859375, 'margin_dpo/margin_mean': 382.72406005859375, 'margin_dpo/margin_std': 464.69305419921875, 'logps/chosen': -550.4375, 'logps/rejected': -975.138916015625, 'logps/ref_chosen': -54.330238342285156, 'logps/ref_rejected': -96.30763244628906, 'KL/chosen_KL_mean': -496.10723876953125, 'KL/rejected_KL_mean': -878.831298828125, 'KL/mean': -687.46923828125, 'KL/std': 442.2324523925781, 'logits/chosen': -0.8254178166389465, 'logits/rejected': -0.8272514343261719, 'epoch': 0.79} + 79%|███████▊ | 535/681 [22:43<06:18, 2.59s/it] 79%|███████▊ | 536/681 [22:46<06:18, 2.61s/it] {'loss': 1.0291, 'grad_norm': 41.1716194152832, 'learning_rate': 6.698662514899638e-08, 'fcm_dpo/beta': 0.001176601741462946, 'fcm_dpo/q_t': 0.3865072429180145, 'fcm_dpo/delta': -0.13552269339561462, 'fcm_dpo/margin': 448.9898681640625, 'margin_dpo/margin_mean': 448.9898376464844, 'margin_dpo/margin_std': 610.5261840820312, 'logps/chosen': -507.267822265625, 'logps/rejected': -998.2750244140625, 'logps/ref_chosen': -47.08053207397461, 'logps/ref_rejected': -89.09783935546875, 'KL/chosen_KL_mean': -460.1872863769531, 'KL/rejected_KL_mean': -909.1771240234375, 'KL/mean': -684.6822509765625, 'KL/std': 509.9305114746094, 'logits/chosen': -0.8181933164596558, 'logits/rejected': -0.8456603288650513, 'epoch': 0.79} + 79%|███████▊ | 536/681 [22:46<06:18, 2.61s/it] 79%|███████▉ | 537/681 [22:49<06:13, 2.59s/it] {'loss': 1.1176, 'grad_norm': 32.44279861450195, 'learning_rate': 6.611477514857114e-08, 'fcm_dpo/beta': 0.0011767192045226693, 'fcm_dpo/q_t': 0.4111158847808838, 'fcm_dpo/delta': 0.013456817716360092, 'fcm_dpo/margin': 328.549560546875, 'margin_dpo/margin_mean': 328.5495910644531, 'margin_dpo/margin_std': 493.1300048828125, 'logps/chosen': -506.9363708496094, 'logps/rejected': -848.1768188476562, 'logps/ref_chosen': -57.747467041015625, 'logps/ref_rejected': -70.43838500976562, 'KL/chosen_KL_mean': -449.18890380859375, 'KL/rejected_KL_mean': -777.7384033203125, 'KL/mean': -613.4636840820312, 'KL/std': 419.79339599609375, 'logits/chosen': -0.8254159688949585, 'logits/rejected': -0.7971071004867554, 'epoch': 0.79} + 79%|███████▉ | 537/681 [22:49<06:13, 2.59s/it] 79%|███████▉ | 538/681 [22:51<06:12, 2.61s/it] {'loss': 1.0812, 'grad_norm': 33.99517059326172, 'learning_rate': 6.524777069483525e-08, 'fcm_dpo/beta': 0.0011678216978907585, 'fcm_dpo/q_t': 0.4062436521053314, 'fcm_dpo/delta': -0.011409275233745575, 'fcm_dpo/margin': 351.79400634765625, 'margin_dpo/margin_mean': 351.7939758300781, 'margin_dpo/margin_std': 454.0805358886719, 'logps/chosen': -658.2581787109375, 'logps/rejected': -1027.8642578125, 'logps/ref_chosen': -66.41594696044922, 'logps/ref_rejected': -84.22808837890625, 'KL/chosen_KL_mean': -591.84228515625, 'KL/rejected_KL_mean': -943.63623046875, 'KL/mean': -767.7392578125, 'KL/std': 430.4447021484375, 'logits/chosen': -0.8461936712265015, 'logits/rejected': -0.826158881187439, 'epoch': 0.79} + 79%|███████▉ | 538/681 [22:51<06:12, 2.61s/it] 79%|███████▉ | 539/681 [22:54<06:14, 2.64s/it] {'loss': 1.1041, 'grad_norm': 38.721561431884766, 'learning_rate': 6.438563463416221e-08, 'fcm_dpo/beta': 0.0011770533164963126, 'fcm_dpo/q_t': 0.41412806510925293, 'fcm_dpo/delta': 0.031044667586684227, 'fcm_dpo/margin': 314.33740234375, 'margin_dpo/margin_mean': 314.33740234375, 'margin_dpo/margin_std': 406.0032958984375, 'logps/chosen': -573.2938232421875, 'logps/rejected': -920.9923095703125, 'logps/ref_chosen': -58.492855072021484, 'logps/ref_rejected': -91.85395050048828, 'KL/chosen_KL_mean': -514.8009643554688, 'KL/rejected_KL_mean': -829.1383666992188, 'KL/mean': -671.9696655273438, 'KL/std': 363.4195861816406, 'logits/chosen': -0.9151267409324646, 'logits/rejected': -0.8990967273712158, 'epoch': 0.79} + 79%|███████▉ | 539/681 [22:54<06:14, 2.64s/it] 79%|███████▉ | 540/681 [22:56<06:09, 2.62s/it] {'loss': 1.0355, 'grad_norm': 41.09721374511719, 'learning_rate': 6.352838968463919e-08, 'fcm_dpo/beta': 0.0011629726504907012, 'fcm_dpo/q_t': 0.3882465958595276, 'fcm_dpo/delta': -0.10943492501974106, 'fcm_dpo/margin': 433.374267578125, 'margin_dpo/margin_mean': 433.374267578125, 'margin_dpo/margin_std': 532.08251953125, 'logps/chosen': -577.1629638671875, 'logps/rejected': -1063.4847412109375, 'logps/ref_chosen': -63.482513427734375, 'logps/ref_rejected': -116.42999267578125, 'KL/chosen_KL_mean': -513.6804809570312, 'KL/rejected_KL_mean': -947.0548095703125, 'KL/mean': -730.3675537109375, 'KL/std': 456.95489501953125, 'logits/chosen': -0.8337998390197754, 'logits/rejected': -0.8539774417877197, 'epoch': 0.79} + 79%|███████▉ | 540/681 [22:57<06:09, 2.62s/it] 79%|███████▉ | 541/681 [22:59<05:59, 2.56s/it] {'loss': 1.206, 'grad_norm': 53.4821662902832, 'learning_rate': 6.267605843546767e-08, 'fcm_dpo/beta': 0.0011470350436866283, 'fcm_dpo/q_t': 0.43513962626457214, 'fcm_dpo/delta': -0.01891069859266281, 'fcm_dpo/margin': 246.7801513671875, 'margin_dpo/margin_mean': 246.7801513671875, 'margin_dpo/margin_std': 501.5079040527344, 'logps/chosen': -699.38720703125, 'logps/rejected': -971.1607666015625, 'logps/ref_chosen': -78.28036499023438, 'logps/ref_rejected': -103.273681640625, 'KL/chosen_KL_mean': -621.1068725585938, 'KL/rejected_KL_mean': -867.8870849609375, 'KL/mean': -744.4969482421875, 'KL/std': 408.0279846191406, 'logits/chosen': -0.9593532085418701, 'logits/rejected': -0.9463214874267578, 'epoch': 0.79} + 79%|███████▉ | 541/681 [22:59<05:59, 2.56s/it] 80%|███████▉ | 542/681 [23:02<06:07, 2.64s/it] {'loss': 1.0499, 'grad_norm': 61.64540100097656, 'learning_rate': 6.182866334636888e-08, 'fcm_dpo/beta': 0.0011208573123440146, 'fcm_dpo/q_t': 0.38958051800727844, 'fcm_dpo/delta': -0.09979057312011719, 'fcm_dpo/margin': 439.63116455078125, 'margin_dpo/margin_mean': 439.631103515625, 'margin_dpo/margin_std': 572.5598754882812, 'logps/chosen': -590.4954223632812, 'logps/rejected': -1069.11669921875, 'logps/ref_chosen': -57.48497009277344, 'logps/ref_rejected': -96.47506713867188, 'KL/chosen_KL_mean': -533.010498046875, 'KL/rejected_KL_mean': -972.6415405273438, 'KL/mean': -752.8260498046875, 'KL/std': 486.2320556640625, 'logits/chosen': -0.9522601962089539, 'logits/rejected': -0.9825873374938965, 'epoch': 0.8} + 80%|███████▉ | 542/681 [23:02<06:07, 2.64s/it] 80%|███████▉ | 543/681 [23:04<06:02, 2.63s/it] {'loss': 1.207, 'grad_norm': 34.3123893737793, 'learning_rate': 6.098622674699147e-08, 'fcm_dpo/beta': 0.001132916659116745, 'fcm_dpo/q_t': 0.4342951774597168, 'fcm_dpo/delta': 0.06954119354486465, 'fcm_dpo/margin': 293.7606506347656, 'margin_dpo/margin_mean': 293.7606506347656, 'margin_dpo/margin_std': 677.75341796875, 'logps/chosen': -676.989013671875, 'logps/rejected': -1015.7310791015625, 'logps/ref_chosen': -60.61750793457031, 'logps/ref_rejected': -105.59896850585938, 'KL/chosen_KL_mean': -616.3714599609375, 'KL/rejected_KL_mean': -910.132080078125, 'KL/mean': -763.2518310546875, 'KL/std': 572.7112426757812, 'logits/chosen': -0.8914676904678345, 'logits/rejected': -0.9207860231399536, 'epoch': 0.8} + 80%|███████▉ | 543/681 [23:04<06:02, 2.63s/it] 80%|███████▉ | 544/681 [23:07<05:55, 2.59s/it] {'loss': 1.1041, 'grad_norm': 31.13963508605957, 'learning_rate': 6.01487708363232e-08, 'fcm_dpo/beta': 0.0011378147173672915, 'fcm_dpo/q_t': 0.40781134366989136, 'fcm_dpo/delta': -0.007483053486794233, 'fcm_dpo/margin': 357.8401184082031, 'margin_dpo/margin_mean': 357.8401184082031, 'margin_dpo/margin_std': 532.9642333984375, 'logps/chosen': -680.517822265625, 'logps/rejected': -1079.67041015625, 'logps/ref_chosen': -59.642303466796875, 'logps/ref_rejected': -100.95469665527344, 'KL/chosen_KL_mean': -620.8755493164062, 'KL/rejected_KL_mean': -978.7156372070312, 'KL/mean': -799.795654296875, 'KL/std': 466.3764343261719, 'logits/chosen': -0.8915605545043945, 'logits/rejected': -0.9122974872589111, 'epoch': 0.8} + 80%|███████▉ | 544/681 [23:07<05:55, 2.59s/it] 80%|████████ | 545/681 [23:09<05:50, 2.57s/it] {'loss': 1.0526, 'grad_norm': 26.347267150878906, 'learning_rate': 5.9316317682106294e-08, 'fcm_dpo/beta': 0.0011245384812355042, 'fcm_dpo/q_t': 0.39294663071632385, 'fcm_dpo/delta': -0.08103551715612411, 'fcm_dpo/margin': 424.308837890625, 'margin_dpo/margin_mean': 424.308837890625, 'margin_dpo/margin_std': 552.90673828125, 'logps/chosen': -660.8782958984375, 'logps/rejected': -1113.446533203125, 'logps/ref_chosen': -67.64859771728516, 'logps/ref_rejected': -95.90800476074219, 'KL/chosen_KL_mean': -593.229736328125, 'KL/rejected_KL_mean': -1017.53857421875, 'KL/mean': -805.3840942382812, 'KL/std': 472.28790283203125, 'logits/chosen': -0.8452168703079224, 'logits/rejected': -0.8719925284385681, 'epoch': 0.8} + 80%|████████ | 545/681 [23:09<05:50, 2.57s/it] 80%|████████ | 546/681 [23:12<05:47, 2.57s/it] {'loss': 1.1546, 'grad_norm': 36.055816650390625, 'learning_rate': 5.848888922025552e-08, 'fcm_dpo/beta': 0.0011334663722664118, 'fcm_dpo/q_t': 0.4246765971183777, 'fcm_dpo/delta': 0.08203422278165817, 'fcm_dpo/margin': 282.87054443359375, 'margin_dpo/margin_mean': 282.87054443359375, 'margin_dpo/margin_std': 451.3182373046875, 'logps/chosen': -617.5347900390625, 'logps/rejected': -931.52734375, 'logps/ref_chosen': -50.744232177734375, 'logps/ref_rejected': -81.86622619628906, 'KL/chosen_KL_mean': -566.79052734375, 'KL/rejected_KL_mean': -849.6610717773438, 'KL/mean': -708.225830078125, 'KL/std': 417.33697509765625, 'logits/chosen': -0.8745754957199097, 'logits/rejected': -0.8525873422622681, 'epoch': 0.8} + 80%|████████ | 546/681 [23:12<05:47, 2.57s/it] 80%|████████ | 547/681 [23:15<05:51, 2.62s/it] {'loss': 1.0906, 'grad_norm': 41.295894622802734, 'learning_rate': 5.7666507254280265e-08, 'fcm_dpo/beta': 0.0011374622117727995, 'fcm_dpo/q_t': 0.4062024652957916, 'fcm_dpo/delta': -0.013586894609034061, 'fcm_dpo/margin': 363.1061096191406, 'margin_dpo/margin_mean': 363.1061096191406, 'margin_dpo/margin_std': 503.5409240722656, 'logps/chosen': -645.0142211914062, 'logps/rejected': -1025.19384765625, 'logps/ref_chosen': -73.6877212524414, 'logps/ref_rejected': -90.76136779785156, 'KL/chosen_KL_mean': -571.3265380859375, 'KL/rejected_KL_mean': -934.4325561523438, 'KL/mean': -752.8795166015625, 'KL/std': 474.61773681640625, 'logits/chosen': -0.863783597946167, 'logits/rejected': -0.8649648427963257, 'epoch': 0.8} + 80%|████████ | 547/681 [23:15<05:51, 2.62s/it] 80%|████████ | 548/681 [23:17<05:43, 2.58s/it] {'loss': 1.1081, 'grad_norm': 29.72757339477539, 'learning_rate': 5.684919345471029e-08, 'fcm_dpo/beta': 0.0011355069000273943, 'fcm_dpo/q_t': 0.41354212164878845, 'fcm_dpo/delta': 0.006264576222747564, 'fcm_dpo/margin': 346.9548034667969, 'margin_dpo/margin_mean': 346.9548034667969, 'margin_dpo/margin_std': 528.34912109375, 'logps/chosen': -637.6410522460938, 'logps/rejected': -1013.467529296875, 'logps/ref_chosen': -65.24634552001953, 'logps/ref_rejected': -94.11807250976562, 'KL/chosen_KL_mean': -572.3947143554688, 'KL/rejected_KL_mean': -919.3494873046875, 'KL/mean': -745.8720703125, 'KL/std': 484.0094909667969, 'logits/chosen': -0.9228535294532776, 'logits/rejected': -0.922650933265686, 'epoch': 0.8} + 80%|████████ | 548/681 [23:17<05:43, 2.58s/it] 81%|████████ | 549/681 [23:20<05:36, 2.55s/it] {'loss': 1.1825, 'grad_norm': 34.956844329833984, 'learning_rate': 5.603696935852426e-08, 'fcm_dpo/beta': 0.0011380038922652602, 'fcm_dpo/q_t': 0.4329938292503357, 'fcm_dpo/delta': 0.00906070601195097, 'fcm_dpo/margin': 256.88873291015625, 'margin_dpo/margin_mean': 256.88873291015625, 'margin_dpo/margin_std': 461.2471008300781, 'logps/chosen': -637.5601806640625, 'logps/rejected': -919.1468505859375, 'logps/ref_chosen': -49.21235656738281, 'logps/ref_rejected': -73.91031646728516, 'KL/chosen_KL_mean': -588.3477783203125, 'KL/rejected_KL_mean': -845.236572265625, 'KL/mean': -716.7921142578125, 'KL/std': 395.0334777832031, 'logits/chosen': -0.9303746223449707, 'logits/rejected': -0.9102168083190918, 'epoch': 0.81} + 81%|████████ | 549/681 [23:20<05:36, 2.55s/it] 81%|████████ | 550/681 [23:22<05:35, 2.56s/it] {'loss': 1.1245, 'grad_norm': 33.49467468261719, 'learning_rate': 5.5229856368582376e-08, 'fcm_dpo/beta': 0.001144929206930101, 'fcm_dpo/q_t': 0.41719305515289307, 'fcm_dpo/delta': 0.04066295921802521, 'fcm_dpo/margin': 315.1492919921875, 'margin_dpo/margin_mean': 315.1493225097656, 'margin_dpo/margin_std': 469.3822021484375, 'logps/chosen': -642.4039306640625, 'logps/rejected': -995.8721313476562, 'logps/ref_chosen': -56.80695343017578, 'logps/ref_rejected': -95.12580871582031, 'KL/chosen_KL_mean': -585.5970458984375, 'KL/rejected_KL_mean': -900.746337890625, 'KL/mean': -743.171630859375, 'KL/std': 431.36700439453125, 'logits/chosen': -0.8681415319442749, 'logits/rejected': -0.8837727308273315, 'epoch': 0.81} + 81%|████████ | 550/681 [23:22<05:35, 2.56s/it] 81%|████████ | 551/681 [23:25<05:28, 2.53s/it] {'loss': 0.9776, 'grad_norm': 60.243988037109375, 'learning_rate': 5.4427875753062734e-08, 'fcm_dpo/beta': 0.0011208320502191782, 'fcm_dpo/q_t': 0.37404024600982666, 'fcm_dpo/delta': -0.15413454174995422, 'fcm_dpo/margin': 486.6854248046875, 'margin_dpo/margin_mean': 486.6854248046875, 'margin_dpo/margin_std': 475.30499267578125, 'logps/chosen': -587.1467895507812, 'logps/rejected': -1126.398681640625, 'logps/ref_chosen': -59.10633087158203, 'logps/ref_rejected': -111.67280578613281, 'KL/chosen_KL_mean': -528.0404663085938, 'KL/rejected_KL_mean': -1014.7258911132812, 'KL/mean': -771.3831787109375, 'KL/std': 475.05487060546875, 'logits/chosen': -0.8686560392379761, 'logits/rejected': -0.9161897301673889, 'epoch': 0.81} + 81%|████████ | 551/681 [23:25<05:28, 2.53s/it] 81%|████████ | 552/681 [23:27<05:16, 2.45s/it] {'loss': 0.9757, 'grad_norm': 43.864139556884766, 'learning_rate': 5.363104864490034e-08, 'fcm_dpo/beta': 0.0010729696368798614, 'fcm_dpo/q_t': 0.37025463581085205, 'fcm_dpo/delta': -0.1995118260383606, 'fcm_dpo/margin': 545.5556030273438, 'margin_dpo/margin_mean': 545.5556030273438, 'margin_dpo/margin_std': 603.31982421875, 'logps/chosen': -555.8642578125, 'logps/rejected': -1143.6273193359375, 'logps/ref_chosen': -62.35459899902344, 'logps/ref_rejected': -104.56210327148438, 'KL/chosen_KL_mean': -493.5096435546875, 'KL/rejected_KL_mean': -1039.065185546875, 'KL/mean': -766.2874145507812, 'KL/std': 549.2742919921875, 'logits/chosen': -0.917883574962616, 'logits/rejected': -0.9444681406021118, 'epoch': 0.81} + 81%|████████ | 552/681 [23:27<05:16, 2.45s/it] 81%|████████ | 553/681 [23:30<05:19, 2.49s/it] {'loss': 1.1534, 'grad_norm': 31.865938186645508, 'learning_rate': 5.2839396041230415e-08, 'fcm_dpo/beta': 0.0010809717932716012, 'fcm_dpo/q_t': 0.4275718331336975, 'fcm_dpo/delta': 0.0810445249080658, 'fcm_dpo/margin': 297.5469055175781, 'margin_dpo/margin_mean': 297.54693603515625, 'margin_dpo/margin_std': 481.7157897949219, 'logps/chosen': -645.8818359375, 'logps/rejected': -973.26708984375, 'logps/ref_chosen': -68.25881958007812, 'logps/ref_rejected': -98.0971450805664, 'KL/chosen_KL_mean': -577.623046875, 'KL/rejected_KL_mean': -875.169921875, 'KL/mean': -726.396484375, 'KL/std': 449.4180603027344, 'logits/chosen': -0.9152529835700989, 'logits/rejected': -0.9069106578826904, 'epoch': 0.81} + 81%|████████ | 553/681 [23:30<05:19, 2.49s/it] 81%|████████▏ | 554/681 [23:32<05:29, 2.59s/it] {'loss': 1.0927, 'grad_norm': 33.93936538696289, 'learning_rate': 5.205293880283551e-08, 'fcm_dpo/beta': 0.001084424089640379, 'fcm_dpo/q_t': 0.4000872075557709, 'fcm_dpo/delta': -0.05283275246620178, 'fcm_dpo/margin': 415.1555480957031, 'margin_dpo/margin_mean': 415.1555480957031, 'margin_dpo/margin_std': 624.768798828125, 'logps/chosen': -663.7021484375, 'logps/rejected': -1100.6927490234375, 'logps/ref_chosen': -67.94767761230469, 'logps/ref_rejected': -89.78272247314453, 'KL/chosen_KL_mean': -595.7544555664062, 'KL/rejected_KL_mean': -1010.9100341796875, 'KL/mean': -803.332275390625, 'KL/std': 496.8535461425781, 'logits/chosen': -0.8999141454696655, 'logits/rejected': -0.862054705619812, 'epoch': 0.81} + 81%|████████▏ | 554/681 [23:32<05:29, 2.59s/it] 81%|████████▏ | 555/681 [23:35<05:18, 2.53s/it] {'loss': 1.0645, 'grad_norm': 35.487144470214844, 'learning_rate': 5.127169765359515e-08, 'fcm_dpo/beta': 0.0010615733917802572, 'fcm_dpo/q_t': 0.3936702311038971, 'fcm_dpo/delta': -0.09408356249332428, 'fcm_dpo/margin': 461.125244140625, 'margin_dpo/margin_mean': 461.125244140625, 'margin_dpo/margin_std': 665.6109008789062, 'logps/chosen': -668.35986328125, 'logps/rejected': -1184.634033203125, 'logps/ref_chosen': -53.33049011230469, 'logps/ref_rejected': -108.47937774658203, 'KL/chosen_KL_mean': -615.0294189453125, 'KL/rejected_KL_mean': -1076.154541015625, 'KL/mean': -845.592041015625, 'KL/std': 520.6309814453125, 'logits/chosen': -0.9481945037841797, 'logits/rejected': -0.9943474531173706, 'epoch': 0.81} + 81%|████████▏ | 555/681 [23:35<05:18, 2.53s/it] 82%|████████▏ | 556/681 [23:37<05:22, 2.58s/it] {'loss': 1.1538, 'grad_norm': 31.20359230041504, 'learning_rate': 5.049569317994012e-08, 'fcm_dpo/beta': 0.0010712645016610622, 'fcm_dpo/q_t': 0.43035784363746643, 'fcm_dpo/delta': 0.10165860503911972, 'fcm_dpo/margin': 281.4146728515625, 'margin_dpo/margin_mean': 281.4146728515625, 'margin_dpo/margin_std': 414.3664245605469, 'logps/chosen': -628.6192016601562, 'logps/rejected': -952.7298583984375, 'logps/ref_chosen': -58.64447021484375, 'logps/ref_rejected': -101.34040832519531, 'KL/chosen_KL_mean': -569.9747314453125, 'KL/rejected_KL_mean': -851.389404296875, 'KL/mean': -710.6820678710938, 'KL/std': 405.1685791015625, 'logits/chosen': -0.9110164642333984, 'logits/rejected': -0.8961154222488403, 'epoch': 0.82} + 82%|████████▏ | 556/681 [23:38<05:22, 2.58s/it] 82%|████████▏ | 557/681 [23:40<05:21, 2.59s/it] {'loss': 1.0935, 'grad_norm': 51.41856384277344, 'learning_rate': 4.9724945830310144e-08, 'fcm_dpo/beta': 0.0010692158248275518, 'fcm_dpo/q_t': 0.40289121866226196, 'fcm_dpo/delta': -0.033952295780181885, 'fcm_dpo/margin': 404.26434326171875, 'margin_dpo/margin_mean': 404.2643127441406, 'margin_dpo/margin_std': 592.6957397460938, 'logps/chosen': -699.2506103515625, 'logps/rejected': -1145.614013671875, 'logps/ref_chosen': -67.84066009521484, 'logps/ref_rejected': -109.93965911865234, 'KL/chosen_KL_mean': -631.4099731445312, 'KL/rejected_KL_mean': -1035.67431640625, 'KL/mean': -833.5421142578125, 'KL/std': 523.793701171875, 'logits/chosen': -0.9860169887542725, 'logits/rejected': -1.0132906436920166, 'epoch': 0.82} + 82%|████████▏ | 557/681 [23:40<05:21, 2.59s/it] 82%|████████▏ | 558/681 [23:43<05:16, 2.57s/it] {'loss': 0.9804, 'grad_norm': 24.727500915527344, 'learning_rate': 4.8959475914614554e-08, 'fcm_dpo/beta': 0.0010408093221485615, 'fcm_dpo/q_t': 0.3689645528793335, 'fcm_dpo/delta': -0.17697550356388092, 'fcm_dpo/margin': 544.393798828125, 'margin_dpo/margin_mean': 544.393798828125, 'margin_dpo/margin_std': 575.1395874023438, 'logps/chosen': -630.277099609375, 'logps/rejected': -1214.463623046875, 'logps/ref_chosen': -62.36824035644531, 'logps/ref_rejected': -102.16102600097656, 'KL/chosen_KL_mean': -567.9088745117188, 'KL/rejected_KL_mean': -1112.302490234375, 'KL/mean': -840.1057739257812, 'KL/std': 511.2911376953125, 'logits/chosen': -1.0073204040527344, 'logits/rejected': -1.016093373298645, 'epoch': 0.82} + 82%|████████▏ | 558/681 [23:43<05:16, 2.57s/it] 82%|████████▏ | 559/681 [23:45<05:12, 2.56s/it] {'loss': 1.0467, 'grad_norm': 27.729312896728516, 'learning_rate': 4.8199303603697614e-08, 'fcm_dpo/beta': 0.0010242098942399025, 'fcm_dpo/q_t': 0.3939628005027771, 'fcm_dpo/delta': -0.06484264880418777, 'fcm_dpo/margin': 450.97210693359375, 'margin_dpo/margin_mean': 450.9721374511719, 'margin_dpo/margin_std': 538.5919799804688, 'logps/chosen': -686.5985717773438, 'logps/rejected': -1170.2607421875, 'logps/ref_chosen': -60.752323150634766, 'logps/ref_rejected': -93.44229125976562, 'KL/chosen_KL_mean': -625.84619140625, 'KL/rejected_KL_mean': -1076.818359375, 'KL/mean': -851.332275390625, 'KL/std': 508.06561279296875, 'logits/chosen': -1.085421085357666, 'logits/rejected': -1.071927547454834, 'epoch': 0.82} + 82%|████████▏ | 559/681 [23:45<05:12, 2.56s/it] 82%|████████▏ | 560/681 [23:48<05:17, 2.62s/it] {'loss': 1.165, 'grad_norm': 32.959224700927734, 'learning_rate': 4.7444448928806615e-08, 'fcm_dpo/beta': 0.0010198511881753802, 'fcm_dpo/q_t': 0.42941996455192566, 'fcm_dpo/delta': 0.00034468769445084035, 'fcm_dpo/margin': 295.4377746582031, 'margin_dpo/margin_mean': 295.4377746582031, 'margin_dpo/margin_std': 473.3067321777344, 'logps/chosen': -658.5667724609375, 'logps/rejected': -975.8919677734375, 'logps/ref_chosen': -58.10382080078125, 'logps/ref_rejected': -79.99122619628906, 'KL/chosen_KL_mean': -600.4629516601562, 'KL/rejected_KL_mean': -895.9007568359375, 'KL/mean': -748.181884765625, 'KL/std': 419.6651306152344, 'logits/chosen': -0.8413786888122559, 'logits/rejected': -0.8068987131118774, 'epoch': 0.82} + 82%|████████▏ | 560/681 [23:48<05:17, 2.62s/it] 82%|████████▏ | 561/681 [23:50<05:08, 2.57s/it] {'loss': 1.1824, 'grad_norm': 33.256561279296875, 'learning_rate': 4.669493178106432e-08, 'fcm_dpo/beta': 0.001038446556776762, 'fcm_dpo/q_t': 0.4279605746269226, 'fcm_dpo/delta': 0.09292855858802795, 'fcm_dpo/margin': 297.7703857421875, 'margin_dpo/margin_mean': 297.7703857421875, 'margin_dpo/margin_std': 566.9799194335938, 'logps/chosen': -727.47216796875, 'logps/rejected': -1073.398193359375, 'logps/ref_chosen': -50.912879943847656, 'logps/ref_rejected': -99.06856536865234, 'KL/chosen_KL_mean': -676.5592651367188, 'KL/rejected_KL_mean': -974.32958984375, 'KL/mean': -825.4444580078125, 'KL/std': 466.03192138671875, 'logits/chosen': -0.957642674446106, 'logits/rejected': -0.9741103649139404, 'epoch': 0.82} + 82%|████████▏ | 561/681 [23:50<05:08, 2.57s/it] 83%|████████▎ | 562/681 [23:53<05:09, 2.60s/it] {'loss': 1.1057, 'grad_norm': 27.82461929321289, 'learning_rate': 4.5950771910944596e-08, 'fcm_dpo/beta': 0.0010351063683629036, 'fcm_dpo/q_t': 0.4100903272628784, 'fcm_dpo/delta': 0.005036838352680206, 'fcm_dpo/margin': 381.3973388671875, 'margin_dpo/margin_mean': 381.3973083496094, 'margin_dpo/margin_std': 549.9033203125, 'logps/chosen': -697.1715087890625, 'logps/rejected': -1115.64697265625, 'logps/ref_chosen': -59.46440124511719, 'logps/ref_rejected': -96.54266357421875, 'KL/chosen_KL_mean': -637.7071533203125, 'KL/rejected_KL_mean': -1019.1043701171875, 'KL/mean': -828.40576171875, 'KL/std': 509.77227783203125, 'logits/chosen': -0.9611387848854065, 'logits/rejected': -0.9594268798828125, 'epoch': 0.83} + 83%|████████▎ | 562/681 [23:53<05:09, 2.60s/it] 83%|████████▎ | 563/681 [23:55<04:59, 2.54s/it] {'loss': 1.2024, 'grad_norm': 37.00297164916992, 'learning_rate': 4.521198892775202e-08, 'fcm_dpo/beta': 0.0010367175564169884, 'fcm_dpo/q_t': 0.4237147569656372, 'fcm_dpo/delta': -0.031953129917383194, 'fcm_dpo/margin': 304.4427490234375, 'margin_dpo/margin_mean': 304.4427490234375, 'margin_dpo/margin_std': 631.2129516601562, 'logps/chosen': -743.1197509765625, 'logps/rejected': -1081.52197265625, 'logps/ref_chosen': -60.60819625854492, 'logps/ref_rejected': -94.56770324707031, 'KL/chosen_KL_mean': -682.5115966796875, 'KL/rejected_KL_mean': -986.954345703125, 'KL/mean': -834.73291015625, 'KL/std': 534.4413452148438, 'logits/chosen': -0.9149300456047058, 'logits/rejected': -0.9164028167724609, 'epoch': 0.83} + 83%|████████▎ | 563/681 [23:55<04:59, 2.54s/it] 83%|████████▎ | 564/681 [23:58<04:54, 2.52s/it] {'loss': 1.1179, 'grad_norm': 37.090126037597656, 'learning_rate': 4.447860229910544e-08, 'fcm_dpo/beta': 0.0010415834840387106, 'fcm_dpo/q_t': 0.41654127836227417, 'fcm_dpo/delta': 0.03815501928329468, 'fcm_dpo/margin': 348.56048583984375, 'margin_dpo/margin_mean': 348.56048583984375, 'margin_dpo/margin_std': 479.8955078125, 'logps/chosen': -728.1798095703125, 'logps/rejected': -1095.7100830078125, 'logps/ref_chosen': -74.26837921142578, 'logps/ref_rejected': -93.23818969726562, 'KL/chosen_KL_mean': -653.9114990234375, 'KL/rejected_KL_mean': -1002.471923828125, 'KL/mean': -828.191650390625, 'KL/std': 475.3956298828125, 'logits/chosen': -1.0703301429748535, 'logits/rejected': -1.0528302192687988, 'epoch': 0.83} + 83%|████████▎ | 564/681 [23:58<04:54, 2.52s/it] 83%|████████▎ | 565/681 [24:01<04:55, 2.55s/it] {'loss': 1.1223, 'grad_norm': 30.26292610168457, 'learning_rate': 4.375063135042445e-08, 'fcm_dpo/beta': 0.0010392372496426105, 'fcm_dpo/q_t': 0.4108823537826538, 'fcm_dpo/delta': -0.005461537279188633, 'fcm_dpo/margin': 389.9073486328125, 'margin_dpo/margin_mean': 389.9073486328125, 'margin_dpo/margin_std': 643.8388671875, 'logps/chosen': -724.4274291992188, 'logps/rejected': -1131.09375, 'logps/ref_chosen': -69.0199203491211, 'logps/ref_rejected': -85.7789306640625, 'KL/chosen_KL_mean': -655.407470703125, 'KL/rejected_KL_mean': -1045.3148193359375, 'KL/mean': -850.3612060546875, 'KL/std': 540.4480590820312, 'logits/chosen': -0.9375953674316406, 'logits/rejected': -0.9292128086090088, 'epoch': 0.83} + 83%|████████▎ | 565/681 [24:01<04:55, 2.55s/it] 83%|████████▎ | 566/681 [24:03<05:01, 2.62s/it] {'loss': 1.094, 'grad_norm': 29.393285751342773, 'learning_rate': 4.3028095264420525e-08, 'fcm_dpo/beta': 0.0010359040461480618, 'fcm_dpo/q_t': 0.3965566158294678, 'fcm_dpo/delta': -0.0708489641547203, 'fcm_dpo/margin': 450.98480224609375, 'margin_dpo/margin_mean': 450.98480224609375, 'margin_dpo/margin_std': 689.2914428710938, 'logps/chosen': -700.9684448242188, 'logps/rejected': -1189.2772216796875, 'logps/ref_chosen': -66.5453109741211, 'logps/ref_rejected': -103.86932373046875, 'KL/chosen_KL_mean': -634.423095703125, 'KL/rejected_KL_mean': -1085.407958984375, 'KL/mean': -859.91552734375, 'KL/std': 575.044189453125, 'logits/chosen': -0.9996108412742615, 'logits/rejected': -1.016195297241211, 'epoch': 0.83} + 83%|████████▎ | 566/681 [24:03<05:01, 2.62s/it] 83%|████████▎ | 567/681 [24:06<04:53, 2.58s/it] {'loss': 1.1274, 'grad_norm': 36.57158660888672, 'learning_rate': 4.231101308059165e-08, 'fcm_dpo/beta': 0.0010342567693442106, 'fcm_dpo/q_t': 0.41991090774536133, 'fcm_dpo/delta': 0.05805526301264763, 'fcm_dpo/margin': 332.55987548828125, 'margin_dpo/margin_mean': 332.55987548828125, 'margin_dpo/margin_std': 469.06707763671875, 'logps/chosen': -683.5215454101562, 'logps/rejected': -1048.5941162109375, 'logps/ref_chosen': -52.85829544067383, 'logps/ref_rejected': -85.37095642089844, 'KL/chosen_KL_mean': -630.6632690429688, 'KL/rejected_KL_mean': -963.22314453125, 'KL/mean': -796.9432373046875, 'KL/std': 397.6266174316406, 'logits/chosen': -1.0474796295166016, 'logits/rejected': -1.0378050804138184, 'epoch': 0.83} + 83%|████████▎ | 567/681 [24:06<04:53, 2.58s/it] 83%|████████▎ | 568/681 [24:08<04:44, 2.52s/it] {'loss': 1.0315, 'grad_norm': 33.18759536743164, 'learning_rate': 4.1599403694720145e-08, 'fcm_dpo/beta': 0.0010257186368107796, 'fcm_dpo/q_t': 0.39098745584487915, 'fcm_dpo/delta': -0.06917618960142136, 'fcm_dpo/margin': 454.1707763671875, 'margin_dpo/margin_mean': 454.1707763671875, 'margin_dpo/margin_std': 485.6314697265625, 'logps/chosen': -644.1751708984375, 'logps/rejected': -1142.2459716796875, 'logps/ref_chosen': -45.1923828125, 'logps/ref_rejected': -89.09236907958984, 'KL/chosen_KL_mean': -598.9827880859375, 'KL/rejected_KL_mean': -1053.153564453125, 'KL/mean': -826.0681762695312, 'KL/std': 466.5391845703125, 'logits/chosen': -0.9316244125366211, 'logits/rejected': -0.962453305721283, 'epoch': 0.83} + 83%|████████▎ | 568/681 [24:08<04:44, 2.52s/it] 84%|████████▎ | 569/681 [24:11<04:45, 2.55s/it] {'loss': 1.1275, 'grad_norm': 40.002628326416016, 'learning_rate': 4.089328585837512e-08, 'fcm_dpo/beta': 0.0010291270446032286, 'fcm_dpo/q_t': 0.41084566712379456, 'fcm_dpo/delta': 0.003299180418252945, 'fcm_dpo/margin': 385.0684814453125, 'margin_dpo/margin_mean': 385.0685119628906, 'margin_dpo/margin_std': 624.4798583984375, 'logps/chosen': -712.5262451171875, 'logps/rejected': -1112.9775390625, 'logps/ref_chosen': -63.72056198120117, 'logps/ref_rejected': -79.10325622558594, 'KL/chosen_KL_mean': -648.8056640625, 'KL/rejected_KL_mean': -1033.874267578125, 'KL/mean': -841.3399658203125, 'KL/std': 573.2427978515625, 'logits/chosen': -0.9903547763824463, 'logits/rejected': -0.9887925982475281, 'epoch': 0.84} + 84%|████████▎ | 569/681 [24:11<04:45, 2.55s/it] 84%|████████▎ | 570/681 [24:13<04:44, 2.56s/it] {'loss': 1.1299, 'grad_norm': 34.861881256103516, 'learning_rate': 4.019267817841834e-08, 'fcm_dpo/beta': 0.0010310852667316794, 'fcm_dpo/q_t': 0.41983291506767273, 'fcm_dpo/delta': 0.053242240101099014, 'fcm_dpo/margin': 338.1347351074219, 'margin_dpo/margin_mean': 338.134765625, 'margin_dpo/margin_std': 491.9954833984375, 'logps/chosen': -696.466552734375, 'logps/rejected': -1055.128662109375, 'logps/ref_chosen': -61.61454391479492, 'logps/ref_rejected': -82.14186096191406, 'KL/chosen_KL_mean': -634.85205078125, 'KL/rejected_KL_mean': -972.98681640625, 'KL/mean': -803.91943359375, 'KL/std': 460.9179992675781, 'logits/chosen': -1.04459547996521, 'logits/rejected': -1.023921251296997, 'epoch': 0.84} + 84%|████████▎ | 570/681 [24:13<04:44, 2.56s/it] 84%|████████▍ | 571/681 [24:16<04:42, 2.57s/it] {'loss': 1.0896, 'grad_norm': 31.51184844970703, 'learning_rate': 3.9497599116513705e-08, 'fcm_dpo/beta': 0.0010264207376167178, 'fcm_dpo/q_t': 0.4029013514518738, 'fcm_dpo/delta': -0.03282318636775017, 'fcm_dpo/margin': 419.9873046875, 'margin_dpo/margin_mean': 419.9873352050781, 'margin_dpo/margin_std': 602.700439453125, 'logps/chosen': -683.38671875, 'logps/rejected': -1141.6568603515625, 'logps/ref_chosen': -53.05406188964844, 'logps/ref_rejected': -91.33682250976562, 'KL/chosen_KL_mean': -630.3326416015625, 'KL/rejected_KL_mean': -1050.320068359375, 'KL/mean': -840.3262939453125, 'KL/std': 493.2159423828125, 'logits/chosen': -0.9481862187385559, 'logits/rejected': -0.9578366279602051, 'epoch': 0.84} + 84%|████████▍ | 571/681 [24:16<04:42, 2.57s/it] 84%|████████▍ | 572/681 [24:18<04:33, 2.51s/it] {'loss': 1.1143, 'grad_norm': 28.058713912963867, 'learning_rate': 3.880806698864086e-08, 'fcm_dpo/beta': 0.0010215662186965346, 'fcm_dpo/q_t': 0.40701764822006226, 'fcm_dpo/delta': -0.026851139962673187, 'fcm_dpo/margin': 416.5491943359375, 'margin_dpo/margin_mean': 416.5491943359375, 'margin_dpo/margin_std': 682.91455078125, 'logps/chosen': -741.0962524414062, 'logps/rejected': -1192.7431640625, 'logps/ref_chosen': -48.45928955078125, 'logps/ref_rejected': -83.55703735351562, 'KL/chosen_KL_mean': -692.636962890625, 'KL/rejected_KL_mean': -1109.18603515625, 'KL/mean': -900.9115600585938, 'KL/std': 563.3121337890625, 'logits/chosen': -0.9600076675415039, 'logits/rejected': -0.9866325855255127, 'epoch': 0.84} + 84%|████████▍ | 572/681 [24:18<04:33, 2.51s/it] 84%|████████▍ | 573/681 [24:21<04:22, 2.43s/it] {'loss': 1.1042, 'grad_norm': 24.622867584228516, 'learning_rate': 3.812409996461275e-08, 'fcm_dpo/beta': 0.0010271357605233788, 'fcm_dpo/q_t': 0.4132155776023865, 'fcm_dpo/delta': 0.01777409017086029, 'fcm_dpo/margin': 372.70294189453125, 'margin_dpo/margin_mean': 372.70294189453125, 'margin_dpo/margin_std': 519.7007446289062, 'logps/chosen': -716.833984375, 'logps/rejected': -1123.2392578125, 'logps/ref_chosen': -51.62262725830078, 'logps/ref_rejected': -85.32499694824219, 'KL/chosen_KL_mean': -665.21142578125, 'KL/rejected_KL_mean': -1037.914306640625, 'KL/mean': -851.5628662109375, 'KL/std': 504.3909606933594, 'logits/chosen': -1.040392518043518, 'logits/rejected': -1.0433319807052612, 'epoch': 0.84} + 84%|████████▍ | 573/681 [24:21<04:22, 2.43s/it] 84%|████████▍ | 574/681 [24:23<04:29, 2.52s/it] {'loss': 1.0857, 'grad_norm': 31.015390396118164, 'learning_rate': 3.74457160675965e-08, 'fcm_dpo/beta': 0.0010248222388327122, 'fcm_dpo/q_t': 0.40481486916542053, 'fcm_dpo/delta': -0.017912685871124268, 'fcm_dpo/margin': 407.05230712890625, 'margin_dpo/margin_mean': 407.05230712890625, 'margin_dpo/margin_std': 548.0139770507812, 'logps/chosen': -672.5933837890625, 'logps/rejected': -1121.40771484375, 'logps/ref_chosen': -51.04446029663086, 'logps/ref_rejected': -92.80640411376953, 'KL/chosen_KL_mean': -621.5488891601562, 'KL/rejected_KL_mean': -1028.601318359375, 'KL/mean': -825.0750732421875, 'KL/std': 480.80010986328125, 'logits/chosen': -1.0330562591552734, 'logits/rejected': -1.0494093894958496, 'epoch': 0.84} + 84%|████████▍ | 574/681 [24:23<04:29, 2.52s/it] 84%|████████▍ | 575/681 [24:26<04:30, 2.55s/it] {'loss': 1.1541, 'grad_norm': 37.037967681884766, 'learning_rate': 3.677293317363864e-08, 'fcm_dpo/beta': 0.0010078808991238475, 'fcm_dpo/q_t': 0.41705384850502014, 'fcm_dpo/delta': -0.0724976509809494, 'fcm_dpo/margin': 359.38861083984375, 'margin_dpo/margin_mean': 359.38861083984375, 'margin_dpo/margin_std': 618.3295288085938, 'logps/chosen': -750.8197021484375, 'logps/rejected': -1133.804443359375, 'logps/ref_chosen': -71.7901382446289, 'logps/ref_rejected': -95.38619995117188, 'KL/chosen_KL_mean': -679.0296020507812, 'KL/rejected_KL_mean': -1038.418212890625, 'KL/mean': -858.723876953125, 'KL/std': 490.94537353515625, 'logits/chosen': -0.8891516923904419, 'logits/rejected': -0.8854223489761353, 'epoch': 0.84} + 84%|████████▍ | 575/681 [24:26<04:30, 2.55s/it] 85%|████████▍ | 576/681 [24:28<04:25, 2.53s/it] {'loss': 1.1671, 'grad_norm': 26.034320831298828, 'learning_rate': 3.6105769011194224e-08, 'fcm_dpo/beta': 0.0010182232363149524, 'fcm_dpo/q_t': 0.4291686415672302, 'fcm_dpo/delta': 0.09279034286737442, 'fcm_dpo/margin': 304.60137939453125, 'margin_dpo/margin_mean': 304.60137939453125, 'margin_dpo/margin_std': 517.416748046875, 'logps/chosen': -684.1763916015625, 'logps/rejected': -1035.26904296875, 'logps/ref_chosen': -54.262962341308594, 'logps/ref_rejected': -100.75428009033203, 'KL/chosen_KL_mean': -629.9134521484375, 'KL/rejected_KL_mean': -934.5147705078125, 'KL/mean': -782.214111328125, 'KL/std': 449.57354736328125, 'logits/chosen': -0.9969866275787354, 'logits/rejected': -1.025536298751831, 'epoch': 0.85} + 85%|████████▍ | 576/681 [24:28<04:25, 2.53s/it] 85%|████████▍ | 577/681 [24:31<04:19, 2.49s/it] {'loss': 1.1134, 'grad_norm': 29.45041275024414, 'learning_rate': 3.5444241160659304e-08, 'fcm_dpo/beta': 0.001027634833008051, 'fcm_dpo/q_t': 0.4120475649833679, 'fcm_dpo/delta': 0.014742329716682434, 'fcm_dpo/margin': 375.43487548828125, 'margin_dpo/margin_mean': 375.43487548828125, 'margin_dpo/margin_std': 524.5450439453125, 'logps/chosen': -634.8232421875, 'logps/rejected': -1032.419189453125, 'logps/ref_chosen': -61.909706115722656, 'logps/ref_rejected': -84.07069396972656, 'KL/chosen_KL_mean': -572.9135131835938, 'KL/rejected_KL_mean': -948.348388671875, 'KL/mean': -760.6309814453125, 'KL/std': 488.9225158691406, 'logits/chosen': -0.9848508834838867, 'logits/rejected': -0.9585464000701904, 'epoch': 0.85} + 85%|████████▍ | 577/681 [24:31<04:19, 2.49s/it] 85%|████████▍ | 578/681 [24:33<04:20, 2.53s/it] {'loss': 1.0665, 'grad_norm': 34.02935791015625, 'learning_rate': 3.478836705390808e-08, 'fcm_dpo/beta': 0.001021248521283269, 'fcm_dpo/q_t': 0.40228772163391113, 'fcm_dpo/delta': -0.021578624844551086, 'fcm_dpo/margin': 411.31219482421875, 'margin_dpo/margin_mean': 411.31219482421875, 'margin_dpo/margin_std': 471.1173095703125, 'logps/chosen': -612.96923828125, 'logps/rejected': -1058.4541015625, 'logps/ref_chosen': -49.26368713378906, 'logps/ref_rejected': -83.4362564086914, 'KL/chosen_KL_mean': -563.70556640625, 'KL/rejected_KL_mean': -975.017822265625, 'KL/mean': -769.3616943359375, 'KL/std': 475.5242919921875, 'logits/chosen': -0.8643592596054077, 'logits/rejected': -0.8935543894767761, 'epoch': 0.85} + 85%|████████▍ | 578/681 [24:33<04:20, 2.53s/it] 85%|████████▌ | 579/681 [24:36<04:25, 2.60s/it] {'loss': 1.194, 'grad_norm': 47.86115646362305, 'learning_rate': 3.41381639738331e-08, 'fcm_dpo/beta': 0.0010419844184070826, 'fcm_dpo/q_t': 0.43572354316711426, 'fcm_dpo/delta': 0.12221585214138031, 'fcm_dpo/margin': 270.0508728027344, 'margin_dpo/margin_mean': 270.0508728027344, 'margin_dpo/margin_std': 516.8199462890625, 'logps/chosen': -678.638427734375, 'logps/rejected': -984.591064453125, 'logps/ref_chosen': -58.88581848144531, 'logps/ref_rejected': -94.78762817382812, 'KL/chosen_KL_mean': -619.7525634765625, 'KL/rejected_KL_mean': -889.803466796875, 'KL/mean': -754.7780151367188, 'KL/std': 469.8000793457031, 'logits/chosen': -0.9247668981552124, 'logits/rejected': -0.918264627456665, 'epoch': 0.85} + 85%|████████▌ | 579/681 [24:36<04:25, 2.60s/it] 85%|████████▌ | 580/681 [24:39<04:22, 2.60s/it] {'loss': 1.0653, 'grad_norm': 30.88446617126465, 'learning_rate': 3.349364905389032e-08, 'fcm_dpo/beta': 0.0010376223362982273, 'fcm_dpo/q_t': 0.3978680372238159, 'fcm_dpo/delta': -0.06723435968160629, 'fcm_dpo/margin': 447.0083312988281, 'margin_dpo/margin_mean': 447.0083312988281, 'margin_dpo/margin_std': 623.1065673828125, 'logps/chosen': -546.7160034179688, 'logps/rejected': -1026.77587890625, 'logps/ref_chosen': -48.70683670043945, 'logps/ref_rejected': -81.7583999633789, 'KL/chosen_KL_mean': -498.0091857910156, 'KL/rejected_KL_mean': -945.0174560546875, 'KL/mean': -721.5133056640625, 'KL/std': 533.5467529296875, 'logits/chosen': -0.8265971541404724, 'logits/rejected': -0.8474031686782837, 'epoch': 0.85} + 85%|████████▌ | 580/681 [24:39<04:22, 2.60s/it] 85%|████████▌ | 581/681 [24:41<04:15, 2.56s/it] {'loss': 1.133, 'grad_norm': 28.153030395507812, 'learning_rate': 3.285483927764726e-08, 'fcm_dpo/beta': 0.001045089797116816, 'fcm_dpo/q_t': 0.4190484881401062, 'fcm_dpo/delta': 0.046971119940280914, 'fcm_dpo/margin': 339.32427978515625, 'margin_dpo/margin_mean': 339.3243103027344, 'margin_dpo/margin_std': 528.5376586914062, 'logps/chosen': -681.4271240234375, 'logps/rejected': -1050.2647705078125, 'logps/ref_chosen': -62.22235107421875, 'logps/ref_rejected': -91.73568725585938, 'KL/chosen_KL_mean': -619.2047729492188, 'KL/rejected_KL_mean': -958.529052734375, 'KL/mean': -788.866943359375, 'KL/std': 473.9400634765625, 'logits/chosen': -1.0129151344299316, 'logits/rejected': -1.0199846029281616, 'epoch': 0.85} + 85%|████████▌ | 581/681 [24:41<04:15, 2.56s/it] 85%|████████▌ | 582/681 [24:44<04:14, 2.57s/it] {'loss': 1.0821, 'grad_norm': 30.396282196044922, 'learning_rate': 3.222175147833556e-08, 'fcm_dpo/beta': 0.001052438747137785, 'fcm_dpo/q_t': 0.40300631523132324, 'fcm_dpo/delta': -0.019892334938049316, 'fcm_dpo/margin': 396.9622802734375, 'margin_dpo/margin_mean': 396.96234130859375, 'margin_dpo/margin_std': 493.5679931640625, 'logps/chosen': -628.058349609375, 'logps/rejected': -1076.861572265625, 'logps/ref_chosen': -58.228660583496094, 'logps/ref_rejected': -110.06959533691406, 'KL/chosen_KL_mean': -569.8297119140625, 'KL/rejected_KL_mean': -966.7920532226562, 'KL/mean': -768.3109130859375, 'KL/std': 444.11328125, 'logits/chosen': -0.9671785831451416, 'logits/rejected': -0.9903292059898376, 'epoch': 0.85} + 85%|████████▌ | 582/681 [24:44<04:14, 2.57s/it] 86%|████████▌ | 583/681 [24:47<04:14, 2.60s/it] {'loss': 1.2385, 'grad_norm': 31.7283878326416, 'learning_rate': 3.159440233840763e-08, 'fcm_dpo/beta': 0.0010401608888059855, 'fcm_dpo/q_t': 0.4429190754890442, 'fcm_dpo/delta': -0.01038757897913456, 'fcm_dpo/margin': 246.67337036132812, 'margin_dpo/margin_mean': 246.6733856201172, 'margin_dpo/margin_std': 584.5450439453125, 'logps/chosen': -701.1990966796875, 'logps/rejected': -979.41357421875, 'logps/ref_chosen': -56.86286163330078, 'logps/ref_rejected': -88.4039306640625, 'KL/chosen_KL_mean': -644.336181640625, 'KL/rejected_KL_mean': -891.0096435546875, 'KL/mean': -767.6728515625, 'KL/std': 475.7613830566406, 'logits/chosen': -0.9231404066085815, 'logits/rejected': -0.9159576892852783, 'epoch': 0.86} + 86%|████████▌ | 583/681 [24:47<04:14, 2.60s/it] 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] {'loss': 1.0207, 'grad_norm': 27.133150100708008, 'learning_rate': 3.0972808389096635e-08, 'fcm_dpo/beta': 0.001025655074045062, 'fcm_dpo/q_t': 0.3882429897785187, 'fcm_dpo/delta': -0.0933271199464798, 'fcm_dpo/margin': 476.524658203125, 'margin_dpo/margin_mean': 476.524658203125, 'margin_dpo/margin_std': 514.8095703125, 'logps/chosen': -605.3631591796875, 'logps/rejected': -1122.623291015625, 'logps/ref_chosen': -56.90068054199219, 'logps/ref_rejected': -97.63606262207031, 'KL/chosen_KL_mean': -548.4625244140625, 'KL/rejected_KL_mean': -1024.9871826171875, 'KL/mean': -786.724853515625, 'KL/std': 508.10198974609375, 'logits/chosen': -0.9409053921699524, 'logits/rejected': -0.9479919672012329, 'epoch': 0.86} + 86%|████████▌ | 584/681 [24:49<04:07, 2.55s/it] 86%|████████▌ | 585/681 [24:51<04:02, 2.53s/it] {'loss': 1.1049, 'grad_norm': 28.67152976989746, 'learning_rate': 3.035698600998121e-08, 'fcm_dpo/beta': 0.001014050329104066, 'fcm_dpo/q_t': 0.40475040674209595, 'fcm_dpo/delta': -0.029842915013432503, 'fcm_dpo/margin': 422.4579162597656, 'margin_dpo/margin_mean': 422.4578857421875, 'margin_dpo/margin_std': 662.7896728515625, 'logps/chosen': -717.804443359375, 'logps/rejected': -1163.4578857421875, 'logps/ref_chosen': -60.973968505859375, 'logps/ref_rejected': -84.16952514648438, 'KL/chosen_KL_mean': -656.8305053710938, 'KL/rejected_KL_mean': -1079.288330078125, 'KL/mean': -868.0594482421875, 'KL/std': 551.96435546875, 'logits/chosen': -0.9976698756217957, 'logits/rejected': -1.021203875541687, 'epoch': 0.86} + 86%|████████▌ | 585/681 [24:51<04:02, 2.53s/it] 86%|████████▌ | 586/681 [24:54<03:59, 2.52s/it] {'loss': 1.1906, 'grad_norm': 29.24315071105957, 'learning_rate': 2.974695142855388e-08, 'fcm_dpo/beta': 0.0010314470855519176, 'fcm_dpo/q_t': 0.433902382850647, 'fcm_dpo/delta': 0.11326177418231964, 'fcm_dpo/margin': 281.2471618652344, 'margin_dpo/margin_mean': 281.2471618652344, 'margin_dpo/margin_std': 540.845458984375, 'logps/chosen': -720.5540161132812, 'logps/rejected': -1036.748291015625, 'logps/ref_chosen': -56.85559844970703, 'logps/ref_rejected': -91.80261993408203, 'KL/chosen_KL_mean': -663.6984252929688, 'KL/rejected_KL_mean': -944.9456176757812, 'KL/mean': -804.322021484375, 'KL/std': 479.632568359375, 'logits/chosen': -0.9781264066696167, 'logits/rejected': -0.9948530197143555, 'epoch': 0.86} + 86%|████████▌ | 586/681 [24:54<03:59, 2.52s/it] 86%|████████▌ | 587/681 [24:57<04:00, 2.56s/it] {'loss': 1.0723, 'grad_norm': 31.405168533325195, 'learning_rate': 2.9142720719793122e-08, 'fcm_dpo/beta': 0.0010362120810896158, 'fcm_dpo/q_t': 0.40200504660606384, 'fcm_dpo/delta': -0.02832759916782379, 'fcm_dpo/margin': 412.146240234375, 'margin_dpo/margin_mean': 412.146240234375, 'margin_dpo/margin_std': 520.8572998046875, 'logps/chosen': -502.47320556640625, 'logps/rejected': -952.5516967773438, 'logps/ref_chosen': -44.69159698486328, 'logps/ref_rejected': -82.62385559082031, 'KL/chosen_KL_mean': -457.7816162109375, 'KL/rejected_KL_mean': -869.9278564453125, 'KL/mean': -663.854736328125, 'KL/std': 509.3709411621094, 'logits/chosen': -1.012029767036438, 'logits/rejected': -1.0366127490997314, 'epoch': 0.86} + 86%|████████▌ | 587/681 [24:57<04:00, 2.56s/it] 86%|████████▋ | 588/681 [24:59<03:51, 2.49s/it] {'loss': 1.1261, 'grad_norm': 30.63210678100586, 'learning_rate': 2.8544309805740018e-08, 'fcm_dpo/beta': 0.001035462599247694, 'fcm_dpo/q_t': 0.4189639091491699, 'fcm_dpo/delta': 0.05226360261440277, 'fcm_dpo/margin': 337.400146484375, 'margin_dpo/margin_mean': 337.4001770019531, 'margin_dpo/margin_std': 472.3351745605469, 'logps/chosen': -684.7158203125, 'logps/rejected': -1079.19091796875, 'logps/ref_chosen': -50.29494857788086, 'logps/ref_rejected': -107.36988067626953, 'KL/chosen_KL_mean': -634.4208984375, 'KL/rejected_KL_mean': -971.821044921875, 'KL/mean': -803.1209716796875, 'KL/std': 458.4549560546875, 'logits/chosen': -0.9633903503417969, 'logits/rejected': -0.9861800670623779, 'epoch': 0.86} + 86%|████████▋ | 588/681 [24:59<03:51, 2.49s/it] 86%|████████▋ | 589/681 [25:01<03:48, 2.49s/it] {'loss': 1.0514, 'grad_norm': 27.6535587310791, 'learning_rate': 2.7951734455078786e-08, 'fcm_dpo/beta': 0.001036192523315549, 'fcm_dpo/q_t': 0.39515334367752075, 'fcm_dpo/delta': -0.057322654873132706, 'fcm_dpo/margin': 438.8658142089844, 'margin_dpo/margin_mean': 438.8658142089844, 'margin_dpo/margin_std': 524.8668823242188, 'logps/chosen': -691.3133544921875, 'logps/rejected': -1181.904541015625, 'logps/ref_chosen': -59.929908752441406, 'logps/ref_rejected': -111.65534973144531, 'KL/chosen_KL_mean': -631.3834838867188, 'KL/rejected_KL_mean': -1070.249267578125, 'KL/mean': -850.8163452148438, 'KL/std': 498.56488037109375, 'logits/chosen': -0.9325675964355469, 'logits/rejected': -0.9372642636299133, 'epoch': 0.86} + 86%|████████▋ | 589/681 [25:01<03:48, 2.49s/it] 87%|████████▋ | 590/681 [25:04<03:43, 2.46s/it] {'loss': 1.0636, 'grad_norm': 29.574190139770508, 'learning_rate': 2.736501028272095e-08, 'fcm_dpo/beta': 0.0010251689236611128, 'fcm_dpo/q_t': 0.40004873275756836, 'fcm_dpo/delta': -0.04011045768857002, 'fcm_dpo/margin': 427.59405517578125, 'margin_dpo/margin_mean': 427.59405517578125, 'margin_dpo/margin_std': 530.9572143554688, 'logps/chosen': -619.2021484375, 'logps/rejected': -1097.0491943359375, 'logps/ref_chosen': -55.80979537963867, 'logps/ref_rejected': -106.06282043457031, 'KL/chosen_KL_mean': -563.392333984375, 'KL/rejected_KL_mean': -990.9863891601562, 'KL/mean': -777.1893310546875, 'KL/std': 485.23968505859375, 'logits/chosen': -0.9315773248672485, 'logits/rejected': -0.9519675970077515, 'epoch': 0.87} + 87%|████████▋ | 590/681 [25:04<03:43, 2.46s/it] 87%|████████▋ | 591/681 [25:06<03:33, 2.37s/it] {'loss': 1.0985, 'grad_norm': 27.988250732421875, 'learning_rate': 2.678415274939408e-08, 'fcm_dpo/beta': 0.0010215968359261751, 'fcm_dpo/q_t': 0.40842732787132263, 'fcm_dpo/delta': 0.0033291950821876526, 'fcm_dpo/margin': 388.35614013671875, 'margin_dpo/margin_mean': 388.35614013671875, 'margin_dpo/margin_std': 533.7264404296875, 'logps/chosen': -660.4113159179688, 'logps/rejected': -1076.3131103515625, 'logps/ref_chosen': -56.24061965942383, 'logps/ref_rejected': -83.78629302978516, 'KL/chosen_KL_mean': -604.170654296875, 'KL/rejected_KL_mean': -992.52685546875, 'KL/mean': -798.3487548828125, 'KL/std': 467.26275634765625, 'logits/chosen': -0.9853817224502563, 'logits/rejected': -0.9650702476501465, 'epoch': 0.87} + 87%|████████▋ | 591/681 [25:06<03:33, 2.37s/it] 87%|████████▋ | 592/681 [25:09<03:36, 2.43s/it] {'loss': 1.1678, 'grad_norm': 25.876665115356445, 'learning_rate': 2.6209177161234442e-08, 'fcm_dpo/beta': 0.0010291507933288813, 'fcm_dpo/q_t': 0.4191494584083557, 'fcm_dpo/delta': 0.04319499433040619, 'fcm_dpo/margin': 348.22406005859375, 'margin_dpo/margin_mean': 348.22406005859375, 'margin_dpo/margin_std': 647.0997314453125, 'logps/chosen': -671.6214599609375, 'logps/rejected': -1047.63818359375, 'logps/ref_chosen': -47.94025421142578, 'logps/ref_rejected': -75.73287963867188, 'KL/chosen_KL_mean': -623.6812133789062, 'KL/rejected_KL_mean': -971.9052734375, 'KL/mean': -797.793212890625, 'KL/std': 479.56719970703125, 'logits/chosen': -0.9463398456573486, 'logits/rejected': -0.9439194798469543, 'epoch': 0.87} + 87%|████████▋ | 592/681 [25:09<03:36, 2.43s/it] 87%|████████▋ | 593/681 [25:11<03:37, 2.47s/it] {'loss': 1.2072, 'grad_norm': 34.479095458984375, 'learning_rate': 2.564009866938349e-08, 'fcm_dpo/beta': 0.0010393188567832112, 'fcm_dpo/q_t': 0.43733033537864685, 'fcm_dpo/delta': 0.03283300623297691, 'fcm_dpo/margin': 266.99151611328125, 'margin_dpo/margin_mean': 266.99151611328125, 'margin_dpo/margin_std': 553.5676879882812, 'logps/chosen': -661.2098388671875, 'logps/rejected': -940.4185791015625, 'logps/ref_chosen': -48.690757751464844, 'logps/ref_rejected': -60.90800094604492, 'KL/chosen_KL_mean': -612.51904296875, 'KL/rejected_KL_mean': -879.5105590820312, 'KL/mean': -746.0147705078125, 'KL/std': 498.0760498046875, 'logits/chosen': -0.8614813089370728, 'logits/rejected': -0.849102258682251, 'epoch': 0.87} + 87%|████████▋ | 593/681 [25:11<03:37, 2.47s/it] 87%|████████▋ | 594/681 [25:14<03:35, 2.48s/it] {'loss': 1.1371, 'grad_norm': 31.0626220703125, 'learning_rate': 2.5076932269588708e-08, 'fcm_dpo/beta': 0.0010341550223529339, 'fcm_dpo/q_t': 0.4182147681713104, 'fcm_dpo/delta': -0.04993312805891037, 'fcm_dpo/margin': 343.2637939453125, 'margin_dpo/margin_mean': 343.2638244628906, 'margin_dpo/margin_std': 524.0340576171875, 'logps/chosen': -622.3153076171875, 'logps/rejected': -996.743896484375, 'logps/ref_chosen': -54.93488693237305, 'logps/ref_rejected': -86.09967803955078, 'KL/chosen_KL_mean': -567.38037109375, 'KL/rejected_KL_mean': -910.6441650390625, 'KL/mean': -739.0123291015625, 'KL/std': 496.5671691894531, 'logits/chosen': -0.9724768400192261, 'logits/rejected': -0.9574602246284485, 'epoch': 0.87} + 87%|████████▋ | 594/681 [25:14<03:35, 2.48s/it] 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] {'loss': 1.1078, 'grad_norm': 31.87458610534668, 'learning_rate': 2.451969280180849e-08, 'fcm_dpo/beta': 0.0010293896775692701, 'fcm_dpo/q_t': 0.41420266032218933, 'fcm_dpo/delta': 0.013885049149394035, 'fcm_dpo/margin': 375.58038330078125, 'margin_dpo/margin_mean': 375.58038330078125, 'margin_dpo/margin_std': 548.4652099609375, 'logps/chosen': -624.103271484375, 'logps/rejected': -1030.890625, 'logps/ref_chosen': -49.4204216003418, 'logps/ref_rejected': -80.62731170654297, 'KL/chosen_KL_mean': -574.682861328125, 'KL/rejected_KL_mean': -950.2633056640625, 'KL/mean': -762.4730224609375, 'KL/std': 482.28497314453125, 'logits/chosen': -0.928321361541748, 'logits/rejected': -0.9355098009109497, 'epoch': 0.87} + 87%|████████▋ | 595/681 [25:16<03:36, 2.52s/it] 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] {'loss': 1.1853, 'grad_norm': 49.97145080566406, 'learning_rate': 2.396839494982103e-08, 'fcm_dpo/beta': 0.0010479073971509933, 'fcm_dpo/q_t': 0.431826651096344, 'fcm_dpo/delta': 0.09277448803186417, 'fcm_dpo/margin': 295.7396240234375, 'margin_dpo/margin_mean': 295.7396240234375, 'margin_dpo/margin_std': 582.1591796875, 'logps/chosen': -697.196533203125, 'logps/rejected': -1013.235595703125, 'logps/ref_chosen': -59.791683197021484, 'logps/ref_rejected': -80.09111785888672, 'KL/chosen_KL_mean': -637.4048461914062, 'KL/rejected_KL_mean': -933.1444091796875, 'KL/mean': -785.274658203125, 'KL/std': 496.126708984375, 'logits/chosen': -0.942278265953064, 'logits/rejected': -0.9016916155815125, 'epoch': 0.88} + 88%|████████▊ | 596/681 [25:19<03:36, 2.55s/it] 88%|████████▊ | 597/681 [25:21<03:35, 2.56s/it] {'loss': 1.0513, 'grad_norm': 33.3725471496582, 'learning_rate': 2.3423053240837514e-08, 'fcm_dpo/beta': 0.0010269451886415482, 'fcm_dpo/q_t': 0.39144447445869446, 'fcm_dpo/delta': -0.09347677230834961, 'fcm_dpo/margin': 473.59698486328125, 'margin_dpo/margin_mean': 473.59698486328125, 'margin_dpo/margin_std': 604.17333984375, 'logps/chosen': -650.3740844726562, 'logps/rejected': -1167.404052734375, 'logps/ref_chosen': -57.26078796386719, 'logps/ref_rejected': -100.6937255859375, 'KL/chosen_KL_mean': -593.11328125, 'KL/rejected_KL_mean': -1066.7103271484375, 'KL/mean': -829.9117431640625, 'KL/std': 540.2069091796875, 'logits/chosen': -0.8926633596420288, 'logits/rejected': -0.9352051019668579, 'epoch': 0.88} + 88%|████████▊ | 597/681 [25:21<03:35, 2.56s/it] 88%|████████▊ | 598/681 [25:24<03:28, 2.51s/it] {'loss': 1.1243, 'grad_norm': 26.78837013244629, 'learning_rate': 2.2883682045119062e-08, 'fcm_dpo/beta': 0.0010294051608070731, 'fcm_dpo/q_t': 0.4142112135887146, 'fcm_dpo/delta': 0.03294781595468521, 'fcm_dpo/margin': 356.9803771972656, 'margin_dpo/margin_mean': 356.98040771484375, 'margin_dpo/margin_std': 516.56591796875, 'logps/chosen': -661.4014892578125, 'logps/rejected': -1055.3072509765625, 'logps/ref_chosen': -52.51850509643555, 'logps/ref_rejected': -89.44385528564453, 'KL/chosen_KL_mean': -608.8829345703125, 'KL/rejected_KL_mean': -965.8634033203125, 'KL/mean': -787.3731689453125, 'KL/std': 469.7054138183594, 'logits/chosen': -0.9758745431900024, 'logits/rejected': -0.9783375859260559, 'epoch': 0.88} + 88%|████████▊ | 598/681 [25:24<03:28, 2.51s/it] 88%|████████▊ | 599/681 [25:27<03:32, 2.60s/it] {'loss': 1.129, 'grad_norm': 28.11864471435547, 'learning_rate': 2.2350295575598367e-08, 'fcm_dpo/beta': 0.0010303169256076217, 'fcm_dpo/q_t': 0.4193369150161743, 'fcm_dpo/delta': -0.039691608399152756, 'fcm_dpo/margin': 322.57403564453125, 'margin_dpo/margin_mean': 322.57403564453125, 'margin_dpo/margin_std': 408.63031005859375, 'logps/chosen': -662.695068359375, 'logps/rejected': -1018.4449462890625, 'logps/ref_chosen': -49.802677154541016, 'logps/ref_rejected': -82.978515625, 'KL/chosen_KL_mean': -612.8923950195312, 'KL/rejected_KL_mean': -935.4663696289062, 'KL/mean': -774.1793823242188, 'KL/std': 445.364501953125, 'logits/chosen': -0.9431838989257812, 'logits/rejected': -0.9493868350982666, 'epoch': 0.88} + 88%|████████▊ | 599/681 [25:27<03:32, 2.60s/it] 88%|████████▊ | 600/681 [25:29<03:32, 2.62s/it] {'loss': 1.1772, 'grad_norm': 35.54128646850586, 'learning_rate': 2.1822907887504932e-08, 'fcm_dpo/beta': 0.001043910626322031, 'fcm_dpo/q_t': 0.4305538535118103, 'fcm_dpo/delta': 0.09467366337776184, 'fcm_dpo/margin': 295.3502197265625, 'margin_dpo/margin_mean': 295.3502197265625, 'margin_dpo/margin_std': 545.6597290039062, 'logps/chosen': -724.086669921875, 'logps/rejected': -1038.45849609375, 'logps/ref_chosen': -66.43487548828125, 'logps/ref_rejected': -85.45649719238281, 'KL/chosen_KL_mean': -657.6517333984375, 'KL/rejected_KL_mean': -953.001953125, 'KL/mean': -805.3268432617188, 'KL/std': 460.2043151855469, 'logits/chosen': -1.0306495428085327, 'logits/rejected': -1.0190434455871582, 'epoch': 0.88} + 88%|████████▊ | 600/681 [25:29<03:32, 2.62s/it] 88%|████████▊ | 601/681 [25:32<03:23, 2.55s/it] {'loss': 1.088, 'grad_norm': 29.669403076171875, 'learning_rate': 2.1301532877994742e-08, 'fcm_dpo/beta': 0.0010490333661437035, 'fcm_dpo/q_t': 0.40675991773605347, 'fcm_dpo/delta': 0.0012606056407094002, 'fcm_dpo/margin': 380.12744140625, 'margin_dpo/margin_mean': 380.12744140625, 'margin_dpo/margin_std': 485.5259704589844, 'logps/chosen': -728.4661254882812, 'logps/rejected': -1144.15087890625, 'logps/ref_chosen': -59.13361358642578, 'logps/ref_rejected': -94.69093322753906, 'KL/chosen_KL_mean': -669.33251953125, 'KL/rejected_KL_mean': -1049.4599609375, 'KL/mean': -859.396240234375, 'KL/std': 509.2434997558594, 'logits/chosen': -0.9606672525405884, 'logits/rejected': -0.9689534306526184, 'epoch': 0.88} + 88%|████████▊ | 601/681 [25:32<03:23, 2.55s/it] 88%|████████▊ | 602/681 [25:34<03:20, 2.54s/it] {'loss': 1.0357, 'grad_norm': 48.49148178100586, 'learning_rate': 2.0786184285784298e-08, 'fcm_dpo/beta': 0.001045595621690154, 'fcm_dpo/q_t': 0.39179527759552, 'fcm_dpo/delta': -0.07681306451559067, 'fcm_dpo/margin': 452.1407470703125, 'margin_dpo/margin_mean': 452.1407470703125, 'margin_dpo/margin_std': 503.97027587890625, 'logps/chosen': -528.1539306640625, 'logps/rejected': -1019.3697509765625, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'KL/chosen_KL_mean': -479.5604248046875, 'KL/rejected_KL_mean': -931.701171875, 'KL/mean': -705.6307373046875, 'KL/std': 458.986328125, 'logits/chosen': -0.9890528917312622, 'logits/rejected': -1.0126900672912598, 'epoch': 0.88} + 88%|████████▊ | 602/681 [25:34<03:20, 2.54s/it] 89%|████████▊ | 603/681 [25:37<03:14, 2.50s/it] {'loss': 1.0773, 'grad_norm': 47.26500701904297, 'learning_rate': 2.0276875690788204e-08, 'fcm_dpo/beta': 0.0010269982740283012, 'fcm_dpo/q_t': 0.40095192193984985, 'fcm_dpo/delta': -0.04837334156036377, 'fcm_dpo/margin': 434.49066162109375, 'margin_dpo/margin_mean': 434.49066162109375, 'margin_dpo/margin_std': 614.062744140625, 'logps/chosen': -633.0875244140625, 'logps/rejected': -1097.4892578125, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32559967041016, 'KL/chosen_KL_mean': -562.6729125976562, 'KL/rejected_KL_mean': -997.16357421875, 'KL/mean': -779.9182739257812, 'KL/std': 511.3848876953125, 'logits/chosen': -0.9793489575386047, 'logits/rejected': -0.9629038572311401, 'epoch': 0.89} + 89%|████████▊ | 603/681 [25:37<03:14, 2.50s/it] 89%|████████▊ | 604/681 [25:39<03:20, 2.60s/it] {'loss': 1.075, 'grad_norm': 33.27207565307617, 'learning_rate': 1.977362051376158e-08, 'fcm_dpo/beta': 0.0010149029549211264, 'fcm_dpo/q_t': 0.39813223481178284, 'fcm_dpo/delta': -0.056748565286397934, 'fcm_dpo/margin': 447.44915771484375, 'margin_dpo/margin_mean': 447.44915771484375, 'margin_dpo/margin_std': 625.2024536132812, 'logps/chosen': -640.7083129882812, 'logps/rejected': -1133.553955078125, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'KL/chosen_KL_mean': -594.250244140625, 'KL/rejected_KL_mean': -1041.699462890625, 'KL/mean': -817.974853515625, 'KL/std': 504.0755615234375, 'logits/chosen': -0.9312797784805298, 'logits/rejected': -0.957409143447876, 'epoch': 0.89} + 89%|████████▊ | 604/681 [25:39<03:20, 2.60s/it] 89%|████████▉ | 605/681 [25:42<03:18, 2.61s/it] {'loss': 1.1146, 'grad_norm': 35.06957244873047, 'learning_rate': 1.9276432015946446e-08, 'fcm_dpo/beta': 0.0010130970040336251, 'fcm_dpo/q_t': 0.4142388701438904, 'fcm_dpo/delta': 0.021622149273753166, 'fcm_dpo/margin': 374.1934509277344, 'margin_dpo/margin_mean': 374.19342041015625, 'margin_dpo/margin_std': 554.1260986328125, 'logps/chosen': -679.8536376953125, 'logps/rejected': -1090.1026611328125, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'KL/chosen_KL_mean': -613.604248046875, 'KL/rejected_KL_mean': -987.7977294921875, 'KL/mean': -800.700927734375, 'KL/std': 475.6051025390625, 'logits/chosen': -0.926941990852356, 'logits/rejected': -0.9346826672554016, 'epoch': 0.89} + 89%|████████▉ | 605/681 [25:42<03:18, 2.61s/it] 89%|████████▉ | 606/681 [25:44<03:11, 2.55s/it] {'loss': 1.0817, 'grad_norm': 25.421344757080078, 'learning_rate': 1.8785323298722093e-08, 'fcm_dpo/beta': 0.001017784932628274, 'fcm_dpo/q_t': 0.4054357707500458, 'fcm_dpo/delta': -0.01989796943962574, 'fcm_dpo/margin': 411.5608825683594, 'margin_dpo/margin_mean': 411.5608825683594, 'margin_dpo/margin_std': 548.4053955078125, 'logps/chosen': -670.047607421875, 'logps/rejected': -1125.160888671875, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37146759033203, 'KL/chosen_KL_mean': -615.2284545898438, 'KL/rejected_KL_mean': -1026.7894287109375, 'KL/mean': -821.0089111328125, 'KL/std': 506.0755615234375, 'logits/chosen': -0.9341834783554077, 'logits/rejected': -0.9450877904891968, 'epoch': 0.89} + 89%|████████▉ | 606/681 [25:45<03:11, 2.55s/it] 89%|████████▉ | 607/681 [25:47<03:10, 2.58s/it] {'loss': 1.1683, 'grad_norm': 29.748842239379883, 'learning_rate': 1.8300307303259904e-08, 'fcm_dpo/beta': 0.0010304426541551948, 'fcm_dpo/q_t': 0.4281555414199829, 'fcm_dpo/delta': 0.09002204239368439, 'fcm_dpo/margin': 303.0076904296875, 'margin_dpo/margin_mean': 303.0076904296875, 'margin_dpo/margin_std': 525.4556274414062, 'logps/chosen': -710.1298828125, 'logps/rejected': -1034.83056640625, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'KL/chosen_KL_mean': -652.0457763671875, 'KL/rejected_KL_mean': -955.053466796875, 'KL/mean': -803.5496826171875, 'KL/std': 466.5636291503906, 'logits/chosen': -0.9252548217773438, 'logits/rejected': -0.9004162549972534, 'epoch': 0.89} + 89%|████████▉ | 607/681 [25:47<03:10, 2.58s/it] 89%|████████▉ | 608/681 [25:50<03:04, 2.52s/it] {'loss': 1.0818, 'grad_norm': 38.95619583129883, 'learning_rate': 1.7821396810182437e-08, 'fcm_dpo/beta': 0.001034360844641924, 'fcm_dpo/q_t': 0.4076157808303833, 'fcm_dpo/delta': 0.0031163040548563004, 'fcm_dpo/margin': 383.7120056152344, 'margin_dpo/margin_mean': 383.71197509765625, 'margin_dpo/margin_std': 460.55084228515625, 'logps/chosen': -646.8289184570312, 'logps/rejected': -1067.863525390625, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'KL/chosen_KL_mean': -589.3780517578125, 'KL/rejected_KL_mean': -973.090087890625, 'KL/mean': -781.2340698242188, 'KL/std': 448.39385986328125, 'logits/chosen': -0.9842853546142578, 'logits/rejected': -0.9917802810668945, 'epoch': 0.89} + 89%|████████▉ | 608/681 [25:50<03:04, 2.52s/it] 89%|████████▉ | 609/681 [25:52<02:56, 2.46s/it] {'loss': 1.0751, 'grad_norm': 29.039478302001953, 'learning_rate': 1.7348604439226617e-08, 'fcm_dpo/beta': 0.0010232683271169662, 'fcm_dpo/q_t': 0.3996211588382721, 'fcm_dpo/delta': -0.06700144708156586, 'fcm_dpo/margin': 453.39361572265625, 'margin_dpo/margin_mean': 453.39361572265625, 'margin_dpo/margin_std': 680.756103515625, 'logps/chosen': -668.6868896484375, 'logps/rejected': -1152.091064453125, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'KL/chosen_KL_mean': -609.8814697265625, 'KL/rejected_KL_mean': -1063.2750244140625, 'KL/mean': -836.5782470703125, 'KL/std': 583.9754638671875, 'logits/chosen': -1.0189104080200195, 'logits/rejected': -1.0311899185180664, 'epoch': 0.89} + 89%|████████▉ | 609/681 [25:52<02:56, 2.46s/it] 90%|████████▉ | 610/681 [25:54<02:52, 2.43s/it] {'loss': 1.1705, 'grad_norm': 47.39312744140625, 'learning_rate': 1.6881942648911074e-08, 'fcm_dpo/beta': 0.0010335487313568592, 'fcm_dpo/q_t': 0.42828184366226196, 'fcm_dpo/delta': 0.09469583630561829, 'fcm_dpo/margin': 298.15496826171875, 'margin_dpo/margin_mean': 298.15496826171875, 'margin_dpo/margin_std': 523.1708984375, 'logps/chosen': -681.43603515625, 'logps/rejected': -997.3013305664062, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.40538787841797, 'KL/chosen_KL_mean': -615.740966796875, 'KL/rejected_KL_mean': -913.8959350585938, 'KL/mean': -764.8184814453125, 'KL/std': 459.981201171875, 'logits/chosen': -0.9286304712295532, 'logits/rejected': -0.8889775276184082, 'epoch': 0.9} + 90%|████████▉ | 610/681 [25:54<02:52, 2.43s/it] 90%|████████▉ | 611/681 [25:57<02:48, 2.40s/it] {'loss': 1.026, 'grad_norm': 33.80018615722656, 'learning_rate': 1.6421423736208e-08, 'fcm_dpo/beta': 0.001017481554299593, 'fcm_dpo/q_t': 0.38256320357322693, 'fcm_dpo/delta': -0.14123646914958954, 'fcm_dpo/margin': 524.6498413085938, 'margin_dpo/margin_mean': 524.6498413085938, 'margin_dpo/margin_std': 674.8773193359375, 'logps/chosen': -669.985595703125, 'logps/rejected': -1228.366943359375, 'logps/ref_chosen': -52.59946823120117, 'logps/ref_rejected': -86.33099365234375, 'KL/chosen_KL_mean': -617.3861694335938, 'KL/rejected_KL_mean': -1142.035888671875, 'KL/mean': -879.7110595703125, 'KL/std': 586.503173828125, 'logits/chosen': -0.9945396184921265, 'logits/rejected': -1.0387227535247803, 'epoch': 0.9} + 90%|████████▉ | 611/681 [25:57<02:48, 2.40s/it] 90%|████████▉ | 612/681 [25:59<02:44, 2.38s/it] {'loss': 1.0831, 'grad_norm': 24.813846588134766, 'learning_rate': 1.5967059836219042e-08, 'fcm_dpo/beta': 0.001010039821267128, 'fcm_dpo/q_t': 0.40539655089378357, 'fcm_dpo/delta': -0.015054378658533096, 'fcm_dpo/margin': 410.1479797363281, 'margin_dpo/margin_mean': 410.1479797363281, 'margin_dpo/margin_std': 539.570556640625, 'logps/chosen': -742.338623046875, 'logps/rejected': -1181.475341796875, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'KL/chosen_KL_mean': -683.014892578125, 'KL/rejected_KL_mean': -1093.162841796875, 'KL/mean': -888.0888671875, 'KL/std': 499.14178466796875, 'logits/chosen': -1.0098485946655273, 'logits/rejected': -1.000733733177185, 'epoch': 0.9} + 90%|████████▉ | 612/681 [25:59<02:44, 2.38s/it] 90%|█████████ | 613/681 [26:01<02:44, 2.42s/it] {'loss': 1.0269, 'grad_norm': 35.546573638916016, 'learning_rate': 1.551886292185553e-08, 'fcm_dpo/beta': 0.0009931058157235384, 'fcm_dpo/q_t': 0.38921403884887695, 'fcm_dpo/delta': -0.08487021923065186, 'fcm_dpo/margin': 484.1087646484375, 'margin_dpo/margin_mean': 484.1087646484375, 'margin_dpo/margin_std': 536.681884765625, 'logps/chosen': -651.31201171875, 'logps/rejected': -1180.79833984375, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10752868652344, 'KL/chosen_KL_mean': -591.58203125, 'KL/rejected_KL_mean': -1075.690673828125, 'KL/mean': -833.6363525390625, 'KL/std': 527.3922119140625, 'logits/chosen': -1.0186982154846191, 'logits/rejected': -1.0692498683929443, 'epoch': 0.9} + 90%|█████████ | 613/681 [26:01<02:44, 2.42s/it] 90%|█████████ | 614/681 [26:04<02:43, 2.45s/it] {'loss': 1.0948, 'grad_norm': 34.93425750732422, 'learning_rate': 1.507684480352292e-08, 'fcm_dpo/beta': 0.0009876348776742816, 'fcm_dpo/q_t': 0.4031580984592438, 'fcm_dpo/delta': -0.028807081282138824, 'fcm_dpo/margin': 432.84381103515625, 'margin_dpo/margin_mean': 432.84381103515625, 'margin_dpo/margin_std': 632.5867919921875, 'logps/chosen': -710.541259765625, 'logps/rejected': -1195.12548828125, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'KL/chosen_KL_mean': -657.602294921875, 'KL/rejected_KL_mean': -1090.4461669921875, 'KL/mean': -874.024169921875, 'KL/std': 522.123046875, 'logits/chosen': -0.9571743011474609, 'logits/rejected': -1.0247644186019897, 'epoch': 0.9} + 90%|█████████ | 614/681 [26:04<02:43, 2.45s/it] 90%|█████████ | 615/681 [26:07<02:47, 2.53s/it] {'loss': 1.1292, 'grad_norm': 25.01183319091797, 'learning_rate': 1.4641017128809801e-08, 'fcm_dpo/beta': 0.000991692766547203, 'fcm_dpo/q_t': 0.41585174202919006, 'fcm_dpo/delta': 0.02896309643983841, 'fcm_dpo/margin': 374.772216796875, 'margin_dpo/margin_mean': 374.772216796875, 'margin_dpo/margin_std': 597.46826171875, 'logps/chosen': -691.8067016601562, 'logps/rejected': -1095.939208984375, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'KL/chosen_KL_mean': -625.9893798828125, 'KL/rejected_KL_mean': -1000.7615966796875, 'KL/mean': -813.37548828125, 'KL/std': 563.71142578125, 'logits/chosen': -0.9686431884765625, 'logits/rejected': -0.9801833033561707, 'epoch': 0.9} + 90%|█████████ | 615/681 [26:07<02:47, 2.53s/it] 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] {'loss': 1.1674, 'grad_norm': 39.332359313964844, 'learning_rate': 1.4211391382180637e-08, 'fcm_dpo/beta': 0.0010039603803306818, 'fcm_dpo/q_t': 0.42835602164268494, 'fcm_dpo/delta': 0.08676433563232422, 'fcm_dpo/margin': 314.47821044921875, 'margin_dpo/margin_mean': 314.478271484375, 'margin_dpo/margin_std': 547.7131958007812, 'logps/chosen': -810.03515625, 'logps/rejected': -1134.0810546875, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'KL/chosen_KL_mean': -744.90234375, 'KL/rejected_KL_mean': -1059.3804931640625, 'KL/mean': -902.1414184570312, 'KL/std': 491.1352233886719, 'logits/chosen': -1.029462218284607, 'logits/rejected': -1.003951907157898, 'epoch': 0.9} + 90%|█████████ | 616/681 [26:09<02:49, 2.61s/it] 91%|█████████ | 617/681 [26:12<02:46, 2.61s/it] {'loss': 1.2256, 'grad_norm': 54.0463981628418, 'learning_rate': 1.378797888467345e-08, 'fcm_dpo/beta': 0.001032671658322215, 'fcm_dpo/q_t': 0.44537049531936646, 'fcm_dpo/delta': 0.16463825106620789, 'fcm_dpo/margin': 231.5129852294922, 'margin_dpo/margin_mean': 231.51300048828125, 'margin_dpo/margin_std': 500.28045654296875, 'logps/chosen': -744.61328125, 'logps/rejected': -977.3547973632812, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'KL/chosen_KL_mean': -681.607666015625, 'KL/rejected_KL_mean': -913.1207275390625, 'KL/mean': -797.3641967773438, 'KL/std': 436.149169921875, 'logits/chosen': -0.9412636756896973, 'logits/rejected': -0.8970128297805786, 'epoch': 0.91} + 91%|█████████ | 617/681 [26:12<02:46, 2.61s/it] 91%|█████████ | 618/681 [26:15<02:44, 2.61s/it] {'loss': 1.0778, 'grad_norm': 36.161380767822266, 'learning_rate': 1.3370790793601371e-08, 'fcm_dpo/beta': 0.0010278007248416543, 'fcm_dpo/q_t': 0.39031103253364563, 'fcm_dpo/delta': -0.1088884249329567, 'fcm_dpo/margin': 489.87103271484375, 'margin_dpo/margin_mean': 489.87103271484375, 'margin_dpo/margin_std': 753.9830322265625, 'logps/chosen': -785.822509765625, 'logps/rejected': -1300.74560546875, 'logps/ref_chosen': -67.10134887695312, 'logps/ref_rejected': -92.15340423583984, 'KL/chosen_KL_mean': -718.72119140625, 'KL/rejected_KL_mean': -1208.59228515625, 'KL/mean': -963.6566772460938, 'KL/std': 597.5335693359375, 'logits/chosen': -0.9905341267585754, 'logits/rejected': -1.0219985246658325, 'epoch': 0.91} + 91%|█████████ | 618/681 [26:15<02:44, 2.61s/it] 91%|█████████ | 619/681 [26:17<02:41, 2.60s/it] {'loss': 1.1572, 'grad_norm': 38.189334869384766, 'learning_rate': 1.2959838102258535e-08, 'fcm_dpo/beta': 0.0010191791225224733, 'fcm_dpo/q_t': 0.4178283214569092, 'fcm_dpo/delta': 0.00997202843427658, 'fcm_dpo/margin': 383.0700378417969, 'margin_dpo/margin_mean': 383.070068359375, 'margin_dpo/margin_std': 730.9571533203125, 'logps/chosen': -750.713134765625, 'logps/rejected': -1170.990234375, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'KL/chosen_KL_mean': -694.73486328125, 'KL/rejected_KL_mean': -1077.804931640625, 'KL/mean': -886.2698974609375, 'KL/std': 564.9559326171875, 'logits/chosen': -0.9747885465621948, 'logits/rejected': -0.9790507555007935, 'epoch': 0.91} + 91%|█████████ | 619/681 [26:17<02:41, 2.60s/it] 91%|█████████ | 620/681 [26:20<02:37, 2.58s/it] {'loss': 1.1238, 'grad_norm': 30.40621566772461, 'learning_rate': 1.2555131639630567e-08, 'fcm_dpo/beta': 0.0010204799473285675, 'fcm_dpo/q_t': 0.41603296995162964, 'fcm_dpo/delta': 0.029505692422389984, 'fcm_dpo/margin': 363.9824523925781, 'margin_dpo/margin_mean': 363.982421875, 'margin_dpo/margin_std': 553.198974609375, 'logps/chosen': -707.4935302734375, 'logps/rejected': -1090.089111328125, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'KL/chosen_KL_mean': -647.696044921875, 'KL/rejected_KL_mean': -1011.6784057617188, 'KL/mean': -829.687255859375, 'KL/std': 487.70135498046875, 'logits/chosen': -1.0277998447418213, 'logits/rejected': -1.0267902612686157, 'epoch': 0.91} + 91%|█████████ | 620/681 [26:20<02:37, 2.58s/it] 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] {'loss': 1.0065, 'grad_norm': 46.220027923583984, 'learning_rate': 1.2156682070109086e-08, 'fcm_dpo/beta': 0.001008342718705535, 'fcm_dpo/q_t': 0.37482962012290955, 'fcm_dpo/delta': -0.16588960587978363, 'fcm_dpo/margin': 552.135986328125, 'margin_dpo/margin_mean': 552.135986328125, 'margin_dpo/margin_std': 646.8200073242188, 'logps/chosen': -683.976806640625, 'logps/rejected': -1270.548583984375, 'logps/ref_chosen': -53.93375778198242, 'logps/ref_rejected': -88.36951446533203, 'KL/chosen_KL_mean': -630.0430908203125, 'KL/rejected_KL_mean': -1182.178955078125, 'KL/mean': -906.111083984375, 'KL/std': 607.4686279296875, 'logits/chosen': -1.044553518295288, 'logits/rejected': -1.0921435356140137, 'epoch': 0.91} + 91%|█████████ | 621/681 [26:22<02:34, 2.57s/it] 91%|█████████▏| 622/681 [26:25<02:30, 2.56s/it] {'loss': 1.1149, 'grad_norm': 29.612194061279297, 'learning_rate': 1.1764499893210878e-08, 'fcm_dpo/beta': 0.0009906619088724256, 'fcm_dpo/q_t': 0.4140721559524536, 'fcm_dpo/delta': 0.01709701120853424, 'fcm_dpo/margin': 386.9031677246094, 'margin_dpo/margin_mean': 386.90313720703125, 'margin_dpo/margin_std': 587.5882568359375, 'logps/chosen': -698.4732666015625, 'logps/rejected': -1110.609375, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'KL/chosen_KL_mean': -638.1873779296875, 'KL/rejected_KL_mean': -1025.090576171875, 'KL/mean': -831.6390380859375, 'KL/std': 474.5973815917969, 'logits/chosen': -0.9218890070915222, 'logits/rejected': -0.8999383449554443, 'epoch': 0.91} + 91%|█████████▏| 622/681 [26:25<02:30, 2.56s/it] 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] {'loss': 1.1834, 'grad_norm': 41.82392883300781, 'learning_rate': 1.1378595443300998e-08, 'fcm_dpo/beta': 0.0010092295706272125, 'fcm_dpo/q_t': 0.431568443775177, 'fcm_dpo/delta': 0.09511934220790863, 'fcm_dpo/margin': 305.0443420410156, 'margin_dpo/margin_mean': 305.0443115234375, 'margin_dpo/margin_std': 590.980224609375, 'logps/chosen': -773.57763671875, 'logps/rejected': -1099.548095703125, 'logps/ref_chosen': -64.1569595336914, 'logps/ref_rejected': -85.08304595947266, 'KL/chosen_KL_mean': -709.4207153320312, 'KL/rejected_KL_mean': -1014.465087890625, 'KL/mean': -861.94287109375, 'KL/std': 493.4505920410156, 'logits/chosen': -1.078977346420288, 'logits/rejected': -1.0663626194000244, 'epoch': 0.91} + 91%|█████████▏| 623/681 [26:27<02:21, 2.44s/it] 92%|█████████▏| 624/681 [26:30<02:21, 2.48s/it] {'loss': 1.0517, 'grad_norm': 32.26824951171875, 'learning_rate': 1.0998978889320582e-08, 'fcm_dpo/beta': 0.0010058375773951411, 'fcm_dpo/q_t': 0.39310479164123535, 'fcm_dpo/delta': -0.06120828539133072, 'fcm_dpo/margin': 455.75689697265625, 'margin_dpo/margin_mean': 455.75689697265625, 'margin_dpo/margin_std': 550.7293701171875, 'logps/chosen': -763.052734375, 'logps/rejected': -1244.02294921875, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'KL/chosen_KL_mean': -691.134033203125, 'KL/rejected_KL_mean': -1146.890869140625, 'KL/mean': -919.0125122070312, 'KL/std': 508.752685546875, 'logits/chosen': -1.066502332687378, 'logits/rejected': -1.0555529594421387, 'epoch': 0.92} + 92%|█████████▏| 624/681 [26:30<02:21, 2.48s/it] 92%|█████████▏| 625/681 [26:32<02:19, 2.49s/it] {'loss': 1.0347, 'grad_norm': 47.13850402832031, 'learning_rate': 1.0625660234518913e-08, 'fcm_dpo/beta': 0.0009930902160704136, 'fcm_dpo/q_t': 0.394126296043396, 'fcm_dpo/delta': -0.06502003967761993, 'fcm_dpo/margin': 465.24932861328125, 'margin_dpo/margin_mean': 465.24932861328125, 'margin_dpo/margin_std': 516.6107788085938, 'logps/chosen': -710.1890869140625, 'logps/rejected': -1203.186767578125, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'KL/chosen_KL_mean': -651.8470458984375, 'KL/rejected_KL_mean': -1117.0963134765625, 'KL/mean': -884.4716796875, 'KL/std': 530.2442626953125, 'logits/chosen': -0.9659937024116516, 'logits/rejected': -0.9809095859527588, 'epoch': 0.92} + 92%|█████████▏| 625/681 [26:32<02:19, 2.49s/it] 92%|█████████▏| 626/681 [26:35<02:22, 2.59s/it] {'loss': 1.1941, 'grad_norm': 27.3458251953125, 'learning_rate': 1.0258649316189721e-08, 'fcm_dpo/beta': 0.0010072626173496246, 'fcm_dpo/q_t': 0.4323081970214844, 'fcm_dpo/delta': 0.10651648044586182, 'fcm_dpo/margin': 294.22900390625, 'margin_dpo/margin_mean': 294.22900390625, 'margin_dpo/margin_std': 575.285400390625, 'logps/chosen': -850.9091796875, 'logps/rejected': -1169.2142333984375, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.188720703125, 'KL/chosen_KL_mean': -775.7965087890625, 'KL/rejected_KL_mean': -1070.0255126953125, 'KL/mean': -922.9110107421875, 'KL/std': 596.8397216796875, 'logits/chosen': -0.9275539517402649, 'logits/rejected': -0.9108865261077881, 'epoch': 0.92} + 92%|█████████▏| 626/681 [26:35<02:22, 2.59s/it] 92%|█████████▏| 627/681 [26:38<02:23, 2.65s/it] {'loss': 1.0171, 'grad_norm': 26.04566192626953, 'learning_rate': 9.897955805412e-09, 'fcm_dpo/beta': 0.0009910902008414268, 'fcm_dpo/q_t': 0.3810996115207672, 'fcm_dpo/delta': -0.176089346408844, 'fcm_dpo/margin': 571.476318359375, 'margin_dpo/margin_mean': 571.476318359375, 'margin_dpo/margin_std': 751.080810546875, 'logps/chosen': -604.9935913085938, 'logps/rejected': -1235.481201171875, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'KL/chosen_KL_mean': -557.2504272460938, 'KL/rejected_KL_mean': -1128.726806640625, 'KL/mean': -842.9886474609375, 'KL/std': 656.2432250976562, 'logits/chosen': -0.8594233989715576, 'logits/rejected': -0.9377764463424683, 'epoch': 0.92} + 92%|█████████▏| 627/681 [26:38<02:23, 2.65s/it] 92%|█████████▏| 628/681 [26:40<02:18, 2.61s/it] {'loss': 1.113, 'grad_norm': 27.978281021118164, 'learning_rate': 9.543589206795238e-09, 'fcm_dpo/beta': 0.0009758264059200883, 'fcm_dpo/q_t': 0.41142743825912476, 'fcm_dpo/delta': 0.007305025588721037, 'fcm_dpo/margin': 402.702880859375, 'margin_dpo/margin_mean': 402.702880859375, 'margin_dpo/margin_std': 607.0352783203125, 'logps/chosen': -783.0570068359375, 'logps/rejected': -1227.131591796875, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'KL/chosen_KL_mean': -722.8740844726562, 'KL/rejected_KL_mean': -1125.5770263671875, 'KL/mean': -924.2254638671875, 'KL/std': 539.3365478515625, 'logits/chosen': -1.021456003189087, 'logits/rejected': -1.0348306894302368, 'epoch': 0.92} + 92%|█████████▏| 628/681 [26:40<02:18, 2.61s/it] 92%|█████████▏| 629/681 [26:43<02:15, 2.61s/it] {'loss': 1.0973, 'grad_norm': 31.393468856811523, 'learning_rate': 9.19555885822887e-09, 'fcm_dpo/beta': 0.0009785511065274477, 'fcm_dpo/q_t': 0.4093359112739563, 'fcm_dpo/delta': 0.010228663682937622, 'fcm_dpo/margin': 398.7236328125, 'margin_dpo/margin_mean': 398.7236328125, 'margin_dpo/margin_std': 530.4059448242188, 'logps/chosen': -762.2110595703125, 'logps/rejected': -1188.374755859375, 'logps/ref_chosen': -64.21354675292969, 'logps/ref_rejected': -91.65367126464844, 'KL/chosen_KL_mean': -697.99755859375, 'KL/rejected_KL_mean': -1096.72119140625, 'KL/mean': -897.359375, 'KL/std': 534.4760131835938, 'logits/chosen': -1.0401864051818848, 'logits/rejected': -1.0507943630218506, 'epoch': 0.92} + 92%|█████████▏| 629/681 [26:43<02:15, 2.61s/it] 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] {'loss': 1.2595, 'grad_norm': 44.8847541809082, 'learning_rate': 8.85387393063622e-09, 'fcm_dpo/beta': 0.000984629150480032, 'fcm_dpo/q_t': 0.4526089131832123, 'fcm_dpo/delta': 0.05507725104689598, 'fcm_dpo/margin': 223.2398681640625, 'margin_dpo/margin_mean': 223.2398681640625, 'margin_dpo/margin_std': 596.5005493164062, 'logps/chosen': -693.6614990234375, 'logps/rejected': -941.2086791992188, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'KL/chosen_KL_mean': -634.3704833984375, 'KL/rejected_KL_mean': -857.6103515625, 'KL/mean': -745.990478515625, 'KL/std': 511.8554382324219, 'logits/chosen': -0.9998958706855774, 'logits/rejected': -0.9623087644577026, 'epoch': 0.93} + 93%|█████████▎| 630/681 [26:45<02:13, 2.62s/it] 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] {'loss': 1.1545, 'grad_norm': 27.521265029907227, 'learning_rate': 8.518543427732949e-09, 'fcm_dpo/beta': 0.0009971531108021736, 'fcm_dpo/q_t': 0.4193703532218933, 'fcm_dpo/delta': 0.04609350860118866, 'fcm_dpo/margin': 356.5467529296875, 'margin_dpo/margin_mean': 356.5467529296875, 'margin_dpo/margin_std': 633.4856567382812, 'logps/chosen': -801.511474609375, 'logps/rejected': -1179.5562744140625, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95156860351562, 'KL/chosen_KL_mean': -742.057861328125, 'KL/rejected_KL_mean': -1098.604736328125, 'KL/mean': -920.331298828125, 'KL/std': 551.6812133789062, 'logits/chosen': -1.0666249990463257, 'logits/rejected': -1.0676807165145874, 'epoch': 0.93} + 93%|█████████▎| 631/681 [26:48<02:08, 2.58s/it] 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] {'loss': 1.1364, 'grad_norm': 31.457181930541992, 'learning_rate': 8.189576185789637e-09, 'fcm_dpo/beta': 0.0009988134261220694, 'fcm_dpo/q_t': 0.4161521792411804, 'fcm_dpo/delta': 0.03309358283877373, 'fcm_dpo/margin': 368.1659240722656, 'margin_dpo/margin_mean': 368.16595458984375, 'margin_dpo/margin_std': 587.306884765625, 'logps/chosen': -722.5360717773438, 'logps/rejected': -1115.510498046875, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'KL/chosen_KL_mean': -661.1845092773438, 'KL/rejected_KL_mean': -1029.350341796875, 'KL/mean': -845.2674560546875, 'KL/std': 465.203369140625, 'logits/chosen': -1.0178093910217285, 'logits/rejected': -1.0131831169128418, 'epoch': 0.93} + 93%|█████████▎| 632/681 [26:50<02:02, 2.50s/it] 93%|█████████▎| 633/681 [26:53<01:59, 2.48s/it] {'loss': 1.2266, 'grad_norm': 33.3397102355957, 'learning_rate': 7.866980873399015e-09, 'fcm_dpo/beta': 0.0010103812674060464, 'fcm_dpo/q_t': 0.44230562448501587, 'fcm_dpo/delta': 0.04477893188595772, 'fcm_dpo/margin': 245.96484375, 'margin_dpo/margin_mean': 245.96484375, 'margin_dpo/margin_std': 546.748046875, 'logps/chosen': -779.2808837890625, 'logps/rejected': -1059.551513671875, 'logps/ref_chosen': -57.27816390991211, 'logps/ref_rejected': -91.58395385742188, 'KL/chosen_KL_mean': -722.002685546875, 'KL/rejected_KL_mean': -967.967529296875, 'KL/mean': -844.985107421875, 'KL/std': 474.93060302734375, 'logits/chosen': -1.0611484050750732, 'logits/rejected': -1.0687685012817383, 'epoch': 0.93} + 93%|█████████▎| 633/681 [26:53<01:59, 2.48s/it] 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] {'loss': 1.2001, 'grad_norm': 29.091527938842773, 'learning_rate': 7.550765991247654e-09, 'fcm_dpo/beta': 0.0010201697004958987, 'fcm_dpo/q_t': 0.43637216091156006, 'fcm_dpo/delta': 0.025866778567433357, 'fcm_dpo/margin': 278.1268615722656, 'margin_dpo/margin_mean': 278.1268310546875, 'margin_dpo/margin_std': 560.8475952148438, 'logps/chosen': -861.1891479492188, 'logps/rejected': -1179.82275390625, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12564849853516, 'KL/chosen_KL_mean': -794.5701904296875, 'KL/rejected_KL_mean': -1072.697021484375, 'KL/mean': -933.633544921875, 'KL/std': 556.5794067382812, 'logits/chosen': -0.9369779825210571, 'logits/rejected': -0.932574987411499, 'epoch': 0.93} + 93%|█████████▎| 634/681 [26:55<01:58, 2.52s/it] 93%|█████████▎| 635/681 [26:58<01:55, 2.51s/it] {'loss': 1.1452, 'grad_norm': 36.472694396972656, 'learning_rate': 7.240939871891699e-09, 'fcm_dpo/beta': 0.0010265845339745283, 'fcm_dpo/q_t': 0.421801894903183, 'fcm_dpo/delta': 0.04961933195590973, 'fcm_dpo/margin': 343.0375671386719, 'margin_dpo/margin_mean': 343.0375671386719, 'margin_dpo/margin_std': 582.0855712890625, 'logps/chosen': -772.060791015625, 'logps/rejected': -1123.643310546875, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'KL/chosen_KL_mean': -698.1052856445312, 'KL/rejected_KL_mean': -1041.142822265625, 'KL/mean': -869.6240844726562, 'KL/std': 603.190185546875, 'logits/chosen': -1.003667950630188, 'logits/rejected': -0.9820040464401245, 'epoch': 0.93} + 93%|█████████▎| 635/681 [26:58<01:55, 2.51s/it] 93%|█████████▎| 636/681 [27:01<01:55, 2.58s/it] {'loss': 1.1069, 'grad_norm': 28.409608840942383, 'learning_rate': 6.937510679537628e-09, 'fcm_dpo/beta': 0.0010364481713622808, 'fcm_dpo/q_t': 0.4110804796218872, 'fcm_dpo/delta': -0.0052045732736587524, 'fcm_dpo/margin': 389.94598388671875, 'margin_dpo/margin_mean': 389.9460144042969, 'margin_dpo/margin_std': 601.882568359375, 'logps/chosen': -730.1439208984375, 'logps/rejected': -1142.4398193359375, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'KL/chosen_KL_mean': -670.5150146484375, 'KL/rejected_KL_mean': -1060.4609375, 'KL/mean': -865.4879760742188, 'KL/std': 573.9064331054688, 'logits/chosen': -0.9490704536437988, 'logits/rejected': -0.9512150287628174, 'epoch': 0.93} + 93%|█████████▎| 636/681 [27:01<01:55, 2.58s/it] 94%|█████████▎| 637/681 [27:03<01:53, 2.57s/it] {'loss': 1.0734, 'grad_norm': 30.13751792907715, 'learning_rate': 6.640486409826785e-09, 'fcm_dpo/beta': 0.0010184976272284985, 'fcm_dpo/q_t': 0.4004812240600586, 'fcm_dpo/delta': -0.04213680326938629, 'fcm_dpo/margin': 431.7720947265625, 'margin_dpo/margin_mean': 431.7720947265625, 'margin_dpo/margin_std': 576.40869140625, 'logps/chosen': -702.34716796875, 'logps/rejected': -1182.871826171875, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'KL/chosen_KL_mean': -652.6944580078125, 'KL/rejected_KL_mean': -1084.4666748046875, 'KL/mean': -868.58056640625, 'KL/std': 552.986572265625, 'logits/chosen': -1.028292179107666, 'logits/rejected': -1.0698986053466797, 'epoch': 0.94} + 94%|█████████▎| 637/681 [27:03<01:53, 2.57s/it] 94%|█████████▎| 638/681 [27:06<01:54, 2.65s/it] {'loss': 1.1535, 'grad_norm': 40.42203140258789, 'learning_rate': 6.349874889624962e-09, 'fcm_dpo/beta': 0.001035545952618122, 'fcm_dpo/q_t': 0.41145235300064087, 'fcm_dpo/delta': 0.03378577530384064, 'fcm_dpo/margin': 352.8500671386719, 'margin_dpo/margin_mean': 352.8500671386719, 'margin_dpo/margin_std': 612.448974609375, 'logps/chosen': -712.9718017578125, 'logps/rejected': -1086.966796875, 'logps/ref_chosen': -58.156639099121094, 'logps/ref_rejected': -79.3014907836914, 'KL/chosen_KL_mean': -654.815185546875, 'KL/rejected_KL_mean': -1007.665283203125, 'KL/mean': -831.240234375, 'KL/std': 540.849609375, 'logits/chosen': -0.9185539484024048, 'logits/rejected': -0.8953431248664856, 'epoch': 0.94} + 94%|█████████▎| 638/681 [27:06<01:54, 2.65s/it] 94%|█████████▍| 639/681 [27:09<01:51, 2.65s/it] {'loss': 1.3033, 'grad_norm': 89.78936004638672, 'learning_rate': 6.065683776815933e-09, 'fcm_dpo/beta': 0.0010275545064359903, 'fcm_dpo/q_t': 0.46009236574172974, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 169.1563262939453, 'margin_dpo/margin_mean': 169.15634155273438, 'margin_dpo/margin_std': 571.2896728515625, 'logps/chosen': -913.1106567382812, 'logps/rejected': -1084.21875, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'KL/chosen_KL_mean': -840.7874755859375, 'KL/rejected_KL_mean': -1009.9437866210938, 'KL/mean': -925.3656005859375, 'KL/std': 470.619140625, 'logits/chosen': -0.9101927876472473, 'logits/rejected': -0.8411852121353149, 'epoch': 0.94} + 94%|█████████▍| 639/681 [27:09<01:51, 2.65s/it] 94%|█████████▍| 640/681 [27:11<01:48, 2.64s/it] {'loss': 1.0178, 'grad_norm': 40.64341354370117, 'learning_rate': 5.7879205600998296e-09, 'fcm_dpo/beta': 0.0010085678659379482, 'fcm_dpo/q_t': 0.379363477230072, 'fcm_dpo/delta': -0.16897350549697876, 'fcm_dpo/margin': 554.99072265625, 'margin_dpo/margin_mean': 554.99072265625, 'margin_dpo/margin_std': 715.995361328125, 'logps/chosen': -717.5319213867188, 'logps/rejected': -1324.9884033203125, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'KL/chosen_KL_mean': -661.3975830078125, 'KL/rejected_KL_mean': -1216.3883056640625, 'KL/mean': -938.8929443359375, 'KL/std': 608.6758422851562, 'logits/chosen': -0.9130121469497681, 'logits/rejected': -0.9402400255203247, 'epoch': 0.94} + 94%|█████████▍| 640/681 [27:11<01:48, 2.64s/it] 94%|█████████▍| 641/681 [27:14<01:44, 2.62s/it] {'loss': 1.1793, 'grad_norm': 30.03923797607422, 'learning_rate': 5.516592558795746e-09, 'fcm_dpo/beta': 0.0010051288409158587, 'fcm_dpo/q_t': 0.42806270718574524, 'fcm_dpo/delta': 0.07953417301177979, 'fcm_dpo/margin': 321.3974609375, 'margin_dpo/margin_mean': 321.3974609375, 'margin_dpo/margin_std': 621.0615234375, 'logps/chosen': -823.4765625, 'logps/rejected': -1166.8695068359375, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'KL/chosen_KL_mean': -758.479736328125, 'KL/rejected_KL_mean': -1079.877197265625, 'KL/mean': -919.178466796875, 'KL/std': 485.34954833984375, 'logits/chosen': -0.9936619997024536, 'logits/rejected': -0.9894883036613464, 'epoch': 0.94} + 94%|█████████▍| 641/681 [27:14<01:44, 2.62s/it] 94%|█████████▍| 642/681 [27:16<01:40, 2.58s/it] {'loss': 1.1415, 'grad_norm': 40.49140167236328, 'learning_rate': 5.251706922648868e-09, 'fcm_dpo/beta': 0.0010101549560204148, 'fcm_dpo/q_t': 0.4139317274093628, 'fcm_dpo/delta': -0.017909951508045197, 'fcm_dpo/margin': 412.79522705078125, 'margin_dpo/margin_mean': 412.7952575683594, 'margin_dpo/margin_std': 782.796142578125, 'logps/chosen': -820.0969848632812, 'logps/rejected': -1277.445068359375, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'KL/chosen_KL_mean': -754.40771484375, 'KL/rejected_KL_mean': -1167.2030029296875, 'KL/mean': -960.8053588867188, 'KL/std': 651.447021484375, 'logits/chosen': -0.9240103960037231, 'logits/rejected': -0.9492435455322266, 'epoch': 0.94} + 94%|█████████▍| 642/681 [27:16<01:40, 2.58s/it] 94%|█████████▍| 643/681 [27:19<01:39, 2.61s/it] {'loss': 1.1687, 'grad_norm': 39.44825744628906, 'learning_rate': 4.993270631642038e-09, 'fcm_dpo/beta': 0.0010031081037595868, 'fcm_dpo/q_t': 0.42987653613090515, 'fcm_dpo/delta': -0.013042459264397621, 'fcm_dpo/margin': 305.1148986816406, 'margin_dpo/margin_mean': 305.1148986816406, 'margin_dpo/margin_std': 513.481689453125, 'logps/chosen': -734.8589477539062, 'logps/rejected': -1075.4921875, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'KL/chosen_KL_mean': -682.908935546875, 'KL/rejected_KL_mean': -988.0238037109375, 'KL/mean': -835.4664306640625, 'KL/std': 504.5992431640625, 'logits/chosen': -1.0819464921951294, 'logits/rejected': -1.0728490352630615, 'epoch': 0.94} + 94%|█████████▍| 643/681 [27:19<01:39, 2.61s/it] 95%|█████████▍| 644/681 [27:22<01:36, 2.61s/it] {'loss': 1.1787, 'grad_norm': 36.88336181640625, 'learning_rate': 4.741290495811873e-09, 'fcm_dpo/beta': 0.0010127369314432144, 'fcm_dpo/q_t': 0.4254780113697052, 'fcm_dpo/delta': 0.06703174114227295, 'fcm_dpo/margin': 331.0244140625, 'margin_dpo/margin_mean': 331.0244140625, 'margin_dpo/margin_std': 643.7864990234375, 'logps/chosen': -718.07470703125, 'logps/rejected': -1077.2181396484375, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'KL/chosen_KL_mean': -659.0570068359375, 'KL/rejected_KL_mean': -990.0814208984375, 'KL/mean': -824.5692138671875, 'KL/std': 571.162841796875, 'logits/chosen': -0.9800692796707153, 'logits/rejected': -0.9834997653961182, 'epoch': 0.95} + 95%|█████████▍| 644/681 [27:22<01:36, 2.61s/it] 95%|█████████▍| 645/681 [27:24<01:33, 2.61s/it] {'loss': 1.3102, 'grad_norm': 85.91677856445312, 'learning_rate': 4.495773155069299e-09, 'fcm_dpo/beta': 0.0010233320062980056, 'fcm_dpo/q_t': 0.4574551284313202, 'fcm_dpo/delta': 0.032772209495306015, 'fcm_dpo/margin': 191.55389404296875, 'margin_dpo/margin_mean': 191.55389404296875, 'margin_dpo/margin_std': 646.7799072265625, 'logps/chosen': -750.8569946289062, 'logps/rejected': -984.315673828125, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'KL/chosen_KL_mean': -694.98095703125, 'KL/rejected_KL_mean': -886.534912109375, 'KL/mean': -790.7579345703125, 'KL/std': 483.12469482421875, 'logits/chosen': -0.9518178105354309, 'logits/rejected': -0.940590500831604, 'epoch': 0.95} + 95%|█████████▍| 645/681 [27:24<01:33, 2.61s/it] 95%|█████████▍| 646/681 [27:27<01:29, 2.55s/it] {'loss': 1.1861, 'grad_norm': 37.95448684692383, 'learning_rate': 4.256725079024553e-09, 'fcm_dpo/beta': 0.0010392502881586552, 'fcm_dpo/q_t': 0.4354844391345978, 'fcm_dpo/delta': 0.12298852950334549, 'fcm_dpo/margin': 270.04473876953125, 'margin_dpo/margin_mean': 270.04473876953125, 'margin_dpo/margin_std': 478.6196594238281, 'logps/chosen': -698.8125610351562, 'logps/rejected': -985.0872802734375, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'KL/chosen_KL_mean': -637.5367431640625, 'KL/rejected_KL_mean': -907.58154296875, 'KL/mean': -772.5591430664062, 'KL/std': 423.0977783203125, 'logits/chosen': -0.9876176118850708, 'logits/rejected': -0.9597277641296387, 'epoch': 0.95} + 95%|█████████▍| 646/681 [27:27<01:29, 2.55s/it] 95%|█████████▌| 647/681 [27:29<01:28, 2.60s/it] {'loss': 1.0951, 'grad_norm': 39.47010040283203, 'learning_rate': 4.024152566816791e-09, 'fcm_dpo/beta': 0.001051081228069961, 'fcm_dpo/q_t': 0.40922337770462036, 'fcm_dpo/delta': 0.009098398499190807, 'fcm_dpo/margin': 372.2242736816406, 'margin_dpo/margin_mean': 372.22430419921875, 'margin_dpo/margin_std': 483.755615234375, 'logps/chosen': -633.5025024414062, 'logps/rejected': -1044.393798828125, 'logps/ref_chosen': -54.8524169921875, 'logps/ref_rejected': -93.5194091796875, 'KL/chosen_KL_mean': -578.6500854492188, 'KL/rejected_KL_mean': -950.8743896484375, 'KL/mean': -764.76220703125, 'KL/std': 502.62335205078125, 'logits/chosen': -0.8613879680633545, 'logits/rejected': -0.8856191039085388, 'epoch': 0.95} + 95%|█████████▌| 647/681 [27:29<01:28, 2.60s/it] 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] {'loss': 1.0229, 'grad_norm': 26.944942474365234, 'learning_rate': 3.798061746947995e-09, 'fcm_dpo/beta': 0.0010257565882056952, 'fcm_dpo/q_t': 0.38258910179138184, 'fcm_dpo/delta': -0.14960268139839172, 'fcm_dpo/margin': 527.59228515625, 'margin_dpo/margin_mean': 527.59228515625, 'margin_dpo/margin_std': 698.5792846679688, 'logps/chosen': -673.0101318359375, 'logps/rejected': -1245.143798828125, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.7127914428711, 'KL/chosen_KL_mean': -618.8386840820312, 'KL/rejected_KL_mean': -1146.430908203125, 'KL/mean': -882.6348266601562, 'KL/std': 597.5460815429688, 'logits/chosen': -1.024482011795044, 'logits/rejected': -1.0812674760818481, 'epoch': 0.95} + 95%|█████████▌| 648/681 [27:32<01:23, 2.53s/it] 95%|█████████▌| 649/681 [27:34<01:22, 2.58s/it] {'loss': 1.2132, 'grad_norm': 37.985782623291016, 'learning_rate': 3.5784585771215235e-09, 'fcm_dpo/beta': 0.00104125018697232, 'fcm_dpo/q_t': 0.43895599246025085, 'fcm_dpo/delta': 0.12126278877258301, 'fcm_dpo/margin': 270.52008056640625, 'margin_dpo/margin_mean': 270.5200500488281, 'margin_dpo/margin_std': 580.6653442382812, 'logps/chosen': -703.569091796875, 'logps/rejected': -991.68603515625, 'logps/ref_chosen': -62.480350494384766, 'logps/ref_rejected': -80.07717895507812, 'KL/chosen_KL_mean': -641.0887451171875, 'KL/rejected_KL_mean': -911.6088256835938, 'KL/mean': -776.3487548828125, 'KL/std': 461.4584045410156, 'logits/chosen': -1.062050223350525, 'logits/rejected': -1.049740195274353, 'epoch': 0.95} + 95%|█████████▌| 649/681 [27:34<01:22, 2.58s/it] 95%|█████████▌| 650/681 [27:37<01:20, 2.60s/it] {'loss': 1.1016, 'grad_norm': 34.3553352355957, 'learning_rate': 3.3653488440851253e-09, 'fcm_dpo/beta': 0.0010373436380177736, 'fcm_dpo/q_t': 0.40345823764801025, 'fcm_dpo/delta': -0.035463616251945496, 'fcm_dpo/margin': 418.1771545410156, 'margin_dpo/margin_mean': 418.1771545410156, 'margin_dpo/margin_std': 649.6616821289062, 'logps/chosen': -737.853271484375, 'logps/rejected': -1198.202392578125, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'KL/chosen_KL_mean': -681.760498046875, 'KL/rejected_KL_mean': -1099.9375, 'KL/mean': -890.84912109375, 'KL/std': 568.1502685546875, 'logits/chosen': -0.9683902859687805, 'logits/rejected': -0.9844076037406921, 'epoch': 0.95} + 95%|█████████▌| 650/681 [27:37<01:20, 2.60s/it] 96%|█████████▌| 651/681 [27:40<01:17, 2.58s/it] {'loss': 1.0078, 'grad_norm': 31.118337631225586, 'learning_rate': 3.158738163478475e-09, 'fcm_dpo/beta': 0.0010176938958466053, 'fcm_dpo/q_t': 0.38280242681503296, 'fcm_dpo/delta': -0.12348881363868713, 'fcm_dpo/margin': 508.0043640136719, 'margin_dpo/margin_mean': 508.0043640136719, 'margin_dpo/margin_std': 563.4285888671875, 'logps/chosen': -532.8199462890625, 'logps/rejected': -1097.356689453125, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.95791625976562, 'KL/chosen_KL_mean': -489.3945007324219, 'KL/rejected_KL_mean': -997.3988037109375, 'KL/mean': -743.3966674804688, 'KL/std': 551.2980346679688, 'logits/chosen': -1.0014972686767578, 'logits/rejected': -1.0536954402923584, 'epoch': 0.96} + 96%|█████████▌| 651/681 [27:40<01:17, 2.58s/it] 96%|█████████▌| 652/681 [27:42<01:14, 2.59s/it] {'loss': 1.1381, 'grad_norm': 38.289588928222656, 'learning_rate': 2.9586319796851555e-09, 'fcm_dpo/beta': 0.001018517417833209, 'fcm_dpo/q_t': 0.41681456565856934, 'fcm_dpo/delta': 0.029787715524435043, 'fcm_dpo/margin': 364.4455871582031, 'margin_dpo/margin_mean': 364.44561767578125, 'margin_dpo/margin_std': 614.3720703125, 'logps/chosen': -679.134033203125, 'logps/rejected': -1092.7706298828125, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'KL/chosen_KL_mean': -616.5572509765625, 'KL/rejected_KL_mean': -981.0028076171875, 'KL/mean': -798.780029296875, 'KL/std': 537.9312744140625, 'logits/chosen': -1.037517786026001, 'logits/rejected': -1.0513508319854736, 'epoch': 0.96} + 96%|█████████▌| 652/681 [27:42<01:14, 2.59s/it] 96%|█████████▌| 653/681 [27:45<01:11, 2.57s/it] {'loss': 1.1448, 'grad_norm': 32.48366165161133, 'learning_rate': 2.7650355656892166e-09, 'fcm_dpo/beta': 0.0010254649678245187, 'fcm_dpo/q_t': 0.4201071858406067, 'fcm_dpo/delta': 0.04164200276136398, 'fcm_dpo/margin': 350.8651123046875, 'margin_dpo/margin_mean': 350.8651123046875, 'margin_dpo/margin_std': 597.3900146484375, 'logps/chosen': -782.2269897460938, 'logps/rejected': -1175.228759765625, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'KL/chosen_KL_mean': -721.114013671875, 'KL/rejected_KL_mean': -1071.979248046875, 'KL/mean': -896.546630859375, 'KL/std': 555.6015625, 'logits/chosen': -1.0504939556121826, 'logits/rejected': -1.0696086883544922, 'epoch': 0.96} + 96%|█████████▌| 653/681 [27:45<01:11, 2.57s/it] 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] {'loss': 1.1394, 'grad_norm': 29.535350799560547, 'learning_rate': 2.577954022936174e-09, 'fcm_dpo/beta': 0.0010335429105907679, 'fcm_dpo/q_t': 0.4215894043445587, 'fcm_dpo/delta': 0.053280387073755264, 'fcm_dpo/margin': 337.2967224121094, 'margin_dpo/margin_mean': 337.2967224121094, 'margin_dpo/margin_std': 535.9071044921875, 'logps/chosen': -712.9915771484375, 'logps/rejected': -1087.333984375, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'KL/chosen_KL_mean': -651.263427734375, 'KL/rejected_KL_mean': -988.5601196289062, 'KL/mean': -819.9118041992188, 'KL/std': 463.3123779296875, 'logits/chosen': -1.0323097705841064, 'logits/rejected': -1.0458433628082275, 'epoch': 0.96} + 96%|█████████▌| 654/681 [27:47<01:09, 2.58s/it] 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] {'loss': 1.1228, 'grad_norm': 32.12811279296875, 'learning_rate': 2.397392281198729e-09, 'fcm_dpo/beta': 0.0010393604170531034, 'fcm_dpo/q_t': 0.4142192006111145, 'fcm_dpo/delta': 0.01507401093840599, 'fcm_dpo/margin': 370.9095458984375, 'margin_dpo/margin_mean': 370.9095458984375, 'margin_dpo/margin_std': 588.18408203125, 'logps/chosen': -649.7203369140625, 'logps/rejected': -1069.344970703125, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'KL/chosen_KL_mean': -600.1435546875, 'KL/rejected_KL_mean': -971.0531005859375, 'KL/mean': -785.5983276367188, 'KL/std': 495.2772216796875, 'logits/chosen': -0.987531304359436, 'logits/rejected': -1.030656337738037, 'epoch': 0.96} + 96%|█████████▌| 655/681 [27:50<01:06, 2.55s/it] 96%|█████████▋| 656/681 [27:52<01:05, 2.61s/it] {'loss': 0.9602, 'grad_norm': 47.409523010253906, 'learning_rate': 2.223355098446622e-09, 'fcm_dpo/beta': 0.0010131911840289831, 'fcm_dpo/q_t': 0.3669106960296631, 'fcm_dpo/delta': -0.20445646345615387, 'fcm_dpo/margin': 585.0487060546875, 'margin_dpo/margin_mean': 585.0487060546875, 'margin_dpo/margin_std': 599.3031005859375, 'logps/chosen': -688.2027587890625, 'logps/rejected': -1334.376708984375, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'KL/chosen_KL_mean': -635.6533203125, 'KL/rejected_KL_mean': -1220.7020263671875, 'KL/mean': -928.177734375, 'KL/std': 598.5955810546875, 'logits/chosen': -0.9014885425567627, 'logits/rejected': -0.9727605581283569, 'epoch': 0.96} + 96%|█████████▋| 656/681 [27:53<01:05, 2.61s/it] 96%|█████████▋| 657/681 [27:55<00:59, 2.50s/it] {'loss': 1.0397, 'grad_norm': 31.6498966217041, 'learning_rate': 2.055847060721566e-09, 'fcm_dpo/beta': 0.0009820859413594007, 'fcm_dpo/q_t': 0.39006322622299194, 'fcm_dpo/delta': -0.09292930364608765, 'fcm_dpo/margin': 496.7813720703125, 'margin_dpo/margin_mean': 496.7813720703125, 'margin_dpo/margin_std': 621.57470703125, 'logps/chosen': -662.6953125, 'logps/rejected': -1210.6910400390625, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'KL/chosen_KL_mean': -615.9948120117188, 'KL/rejected_KL_mean': -1112.776123046875, 'KL/mean': -864.385498046875, 'KL/std': 595.528564453125, 'logits/chosen': -1.0363855361938477, 'logits/rejected': -1.0796374082565308, 'epoch': 0.96} + 96%|█████████▋| 657/681 [27:55<00:59, 2.50s/it] 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] {'loss': 1.124, 'grad_norm': 35.535884857177734, 'learning_rate': 1.8948725820160662e-09, 'fcm_dpo/beta': 0.00098237837664783, 'fcm_dpo/q_t': 0.4168873727321625, 'fcm_dpo/delta': 0.04202239215373993, 'fcm_dpo/margin': 365.70672607421875, 'margin_dpo/margin_mean': 365.70672607421875, 'margin_dpo/margin_std': 524.6923217773438, 'logps/chosen': -725.7801513671875, 'logps/rejected': -1126.46826171875, 'logps/ref_chosen': -60.95820999145508, 'logps/ref_rejected': -95.93949127197266, 'KL/chosen_KL_mean': -664.8219604492188, 'KL/rejected_KL_mean': -1030.5286865234375, 'KL/mean': -847.6753540039062, 'KL/std': 466.9921875, 'logits/chosen': -0.9736270904541016, 'logits/rejected': -0.9889022707939148, 'epoch': 0.97} + 97%|█████████▋| 658/681 [27:57<00:56, 2.45s/it] 97%|█████████▋| 659/681 [28:00<00:55, 2.54s/it] {'loss': 1.0978, 'grad_norm': 25.766517639160156, 'learning_rate': 1.7404359041573723e-09, 'fcm_dpo/beta': 0.0009883574675768614, 'fcm_dpo/q_t': 0.41146624088287354, 'fcm_dpo/delta': 0.014654016122221947, 'fcm_dpo/margin': 390.34075927734375, 'margin_dpo/margin_mean': 390.3408203125, 'margin_dpo/margin_std': 514.568359375, 'logps/chosen': -681.4256591796875, 'logps/rejected': -1082.4945068359375, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'KL/chosen_KL_mean': -604.6826782226562, 'KL/rejected_KL_mean': -995.0235595703125, 'KL/mean': -799.8530883789062, 'KL/std': 473.0037841796875, 'logits/chosen': -0.9188249111175537, 'logits/rejected': -0.8752338886260986, 'epoch': 0.97} + 97%|█████████▋| 659/681 [28:00<00:55, 2.54s/it] 97%|█████████▋| 660/681 [28:02<00:52, 2.50s/it] {'loss': 1.0585, 'grad_norm': 49.228233337402344, 'learning_rate': 1.592541096695571e-09, 'fcm_dpo/beta': 0.000986184342764318, 'fcm_dpo/q_t': 0.39679035544395447, 'fcm_dpo/delta': -0.0536465048789978, 'fcm_dpo/margin': 457.5646057128906, 'margin_dpo/margin_mean': 457.5645751953125, 'margin_dpo/margin_std': 571.7623291015625, 'logps/chosen': -705.294921875, 'logps/rejected': -1179.771728515625, 'logps/ref_chosen': -59.04788589477539, 'logps/ref_rejected': -75.96005249023438, 'KL/chosen_KL_mean': -646.2470703125, 'KL/rejected_KL_mean': -1103.811767578125, 'KL/mean': -875.0294189453125, 'KL/std': 534.911865234375, 'logits/chosen': -0.9931929111480713, 'logits/rejected': -0.995282769203186, 'epoch': 0.97} + 97%|█████████▋| 660/681 [28:02<00:52, 2.50s/it] 97%|█████████▋| 661/681 [28:04<00:48, 2.41s/it] {'loss': 1.0753, 'grad_norm': 37.587955474853516, 'learning_rate': 1.4511920567963908e-09, 'fcm_dpo/beta': 0.0009824851294979453, 'fcm_dpo/q_t': 0.40450412034988403, 'fcm_dpo/delta': -0.022726453840732574, 'fcm_dpo/margin': 428.9695739746094, 'margin_dpo/margin_mean': 428.96954345703125, 'margin_dpo/margin_std': 554.69189453125, 'logps/chosen': -618.14892578125, 'logps/rejected': -1082.4501953125, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'KL/chosen_KL_mean': -567.4749145507812, 'KL/rejected_KL_mean': -996.4445190429688, 'KL/mean': -781.959716796875, 'KL/std': 581.6163330078125, 'logits/chosen': -1.0141196250915527, 'logits/rejected': -1.0162596702575684, 'epoch': 0.97} + 97%|█████████▋| 661/681 [28:04<00:48, 2.41s/it] 97%|█████████▋| 662/681 [28:07<00:48, 2.57s/it] {'loss': 1.1943, 'grad_norm': 34.92011260986328, 'learning_rate': 1.3163925091384532e-09, 'fcm_dpo/beta': 0.0009899393189698458, 'fcm_dpo/q_t': 0.4304784834384918, 'fcm_dpo/delta': 0.09499240666627884, 'fcm_dpo/margin': 311.1284484863281, 'margin_dpo/margin_mean': 311.1284484863281, 'margin_dpo/margin_std': 634.947998046875, 'logps/chosen': -757.1141357421875, 'logps/rejected': -1088.0374755859375, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'KL/chosen_KL_mean': -687.85302734375, 'KL/rejected_KL_mean': -998.9815673828125, 'KL/mean': -843.4173583984375, 'KL/std': 508.81829833984375, 'logits/chosen': -0.9697600603103638, 'logits/rejected': -0.9555808901786804, 'epoch': 0.97} + 97%|█████████▋| 662/681 [28:07<00:48, 2.57s/it] 97%|█████████▋| 663/681 [28:10<00:47, 2.65s/it] {'loss': 1.1103, 'grad_norm': 28.98455047607422, 'learning_rate': 1.1881460058152382e-09, 'fcm_dpo/beta': 0.000994151458144188, 'fcm_dpo/q_t': 0.40818944573402405, 'fcm_dpo/delta': -0.01281630527228117, 'fcm_dpo/margin': 414.7147521972656, 'margin_dpo/margin_mean': 414.71478271484375, 'margin_dpo/margin_std': 667.8819580078125, 'logps/chosen': -691.5870361328125, 'logps/rejected': -1155.348388671875, 'logps/ref_chosen': -64.87890625, 'logps/ref_rejected': -113.92536926269531, 'KL/chosen_KL_mean': -626.7081298828125, 'KL/rejected_KL_mean': -1041.4229736328125, 'KL/mean': -834.0655517578125, 'KL/std': 581.6314697265625, 'logits/chosen': -1.0159096717834473, 'logits/rejected': -1.0367473363876343, 'epoch': 0.97} + 97%|█████████▋| 663/681 [28:10<00:47, 2.65s/it] 98%|█████████▊| 664/681 [28:13<00:45, 2.65s/it] {'loss': 1.0597, 'grad_norm': 26.692996978759766, 'learning_rate': 1.066455926241383e-09, 'fcm_dpo/beta': 0.000979449599981308, 'fcm_dpo/q_t': 0.3954671621322632, 'fcm_dpo/delta': -0.06696485728025436, 'fcm_dpo/margin': 473.0594482421875, 'margin_dpo/margin_mean': 473.0594177246094, 'margin_dpo/margin_std': 619.4970703125, 'logps/chosen': -706.0938110351562, 'logps/rejected': -1223.7864990234375, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'KL/chosen_KL_mean': -645.205322265625, 'KL/rejected_KL_mean': -1118.2647705078125, 'KL/mean': -881.735107421875, 'KL/std': 575.0771484375, 'logits/chosen': -0.9881083965301514, 'logits/rejected': -1.0177645683288574, 'epoch': 0.98} + 98%|█████████▊| 664/681 [28:13<00:45, 2.65s/it] 98%|█████████▊| 665/681 [28:15<00:40, 2.55s/it] {'loss': 1.0975, 'grad_norm': 33.21379089355469, 'learning_rate': 9.513254770636137e-10, 'fcm_dpo/beta': 0.0009838908445090055, 'fcm_dpo/q_t': 0.41350919008255005, 'fcm_dpo/delta': 0.028051599860191345, 'fcm_dpo/margin': 379.10247802734375, 'margin_dpo/margin_mean': 379.10247802734375, 'margin_dpo/margin_std': 466.126220703125, 'logps/chosen': -656.0525512695312, 'logps/rejected': -1059.399658203125, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.80882263183594, 'KL/chosen_KL_mean': -595.4884033203125, 'KL/rejected_KL_mean': -974.5908813476562, 'KL/mean': -785.0396728515625, 'KL/std': 466.5865478515625, 'logits/chosen': -1.076425313949585, 'logits/rejected': -1.0883920192718506, 'epoch': 0.98} + 98%|█████████▊| 665/681 [28:15<00:40, 2.55s/it] 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] {'loss': 1.0931, 'grad_norm': 28.340333938598633, 'learning_rate': 8.427576920763956e-10, 'fcm_dpo/beta': 0.00098421610891819, 'fcm_dpo/q_t': 0.4083176255226135, 'fcm_dpo/delta': -0.001031767576932907, 'fcm_dpo/margin': 407.4012451171875, 'margin_dpo/margin_mean': 407.4012451171875, 'margin_dpo/margin_std': 544.3861083984375, 'logps/chosen': -704.9820556640625, 'logps/rejected': -1143.85498046875, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.8916244506836, 'KL/chosen_KL_mean': -640.5621337890625, 'KL/rejected_KL_mean': -1047.96337890625, 'KL/mean': -844.2626953125, 'KL/std': 506.4635314941406, 'logits/chosen': -0.9047819375991821, 'logits/rejected': -0.9091357588768005, 'epoch': 0.98} + 98%|█████████▊| 666/681 [28:18<00:39, 2.61s/it] 98%|█████████▊| 667/681 [28:21<00:36, 2.61s/it] {'loss': 1.0786, 'grad_norm': 37.009464263916016, 'learning_rate': 7.407554321417764e-10, 'fcm_dpo/beta': 0.0009798547253012657, 'fcm_dpo/q_t': 0.40200570225715637, 'fcm_dpo/delta': -0.028992321342229843, 'fcm_dpo/margin': 436.495361328125, 'margin_dpo/margin_mean': 436.495361328125, 'margin_dpo/margin_std': 579.8436889648438, 'logps/chosen': -777.3001098632812, 'logps/rejected': -1232.35400390625, 'logps/ref_chosen': -69.27702331542969, 'logps/ref_rejected': -87.83549499511719, 'KL/chosen_KL_mean': -708.0230712890625, 'KL/rejected_KL_mean': -1144.5185546875, 'KL/mean': -926.270751953125, 'KL/std': 533.2125244140625, 'logits/chosen': -0.9339680671691895, 'logits/rejected': -0.918968677520752, 'epoch': 0.98} + 98%|█████████▊| 667/681 [28:21<00:36, 2.61s/it] 98%|█████████▊| 668/681 [28:23<00:34, 2.63s/it] {'loss': 1.1857, 'grad_norm': 33.102317810058594, 'learning_rate': 6.453213851142225e-10, 'fcm_dpo/beta': 0.0009952853433787823, 'fcm_dpo/q_t': 0.42794138193130493, 'fcm_dpo/delta': 0.07259482145309448, 'fcm_dpo/margin': 330.4443359375, 'margin_dpo/margin_mean': 330.4443359375, 'margin_dpo/margin_std': 662.1550903320312, 'logps/chosen': -814.5380859375, 'logps/rejected': -1176.117431640625, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905944824219, 'KL/chosen_KL_mean': -741.93408203125, 'KL/rejected_KL_mean': -1072.37841796875, 'KL/mean': -907.15625, 'KL/std': 566.6580810546875, 'logits/chosen': -1.0173263549804688, 'logits/rejected': -1.014156460762024, 'epoch': 0.98} + 98%|█████████▊| 668/681 [28:23<00:34, 2.63s/it] 98%|█████████▊| 669/681 [28:26<00:31, 2.65s/it] {'loss': 1.0695, 'grad_norm': 25.456424713134766, 'learning_rate': 5.564580657695939e-10, 'fcm_dpo/beta': 0.000986847560852766, 'fcm_dpo/q_t': 0.3974974751472473, 'fcm_dpo/delta': -0.051722507923841476, 'fcm_dpo/margin': 455.413330078125, 'margin_dpo/margin_mean': 455.41326904296875, 'margin_dpo/margin_std': 612.3760986328125, 'logps/chosen': -637.0415649414062, 'logps/rejected': -1124.2626953125, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'KL/chosen_KL_mean': -590.9251708984375, 'KL/rejected_KL_mean': -1046.33837890625, 'KL/mean': -818.6317749023438, 'KL/std': 550.6939697265625, 'logits/chosen': -0.9681833982467651, 'logits/rejected': -0.9636249542236328, 'epoch': 0.98} + 98%|█████████▊| 669/681 [28:26<00:31, 2.65s/it] 98%|█████████▊| 670/681 [28:29<00:29, 2.64s/it] {'loss': 1.049, 'grad_norm': 32.25609588623047, 'learning_rate': 4.741678157389739e-10, 'fcm_dpo/beta': 0.000975792994722724, 'fcm_dpo/q_t': 0.39131563901901245, 'fcm_dpo/delta': -0.08364107459783554, 'fcm_dpo/margin': 491.48895263671875, 'margin_dpo/margin_mean': 491.48895263671875, 'margin_dpo/margin_std': 618.3614501953125, 'logps/chosen': -613.7977905273438, 'logps/rejected': -1139.881591796875, 'logps/ref_chosen': -62.34575271606445, 'logps/ref_rejected': -96.9405517578125, 'KL/chosen_KL_mean': -551.4520263671875, 'KL/rejected_KL_mean': -1042.94091796875, 'KL/mean': -797.196533203125, 'KL/std': 524.746826171875, 'logits/chosen': -0.9351658225059509, 'logits/rejected': -0.9491223096847534, 'epoch': 0.98} + 98%|█████████▊| 670/681 [28:29<00:29, 2.64s/it] 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] {'loss': 1.1089, 'grad_norm': 28.569211959838867, 'learning_rate': 3.9845280344705245e-10, 'fcm_dpo/beta': 0.0009718855144456029, 'fcm_dpo/q_t': 0.4098934829235077, 'fcm_dpo/delta': 0.009846452623605728, 'fcm_dpo/margin': 401.5755310058594, 'margin_dpo/margin_mean': 401.5755310058594, 'margin_dpo/margin_std': 580.24072265625, 'logps/chosen': -714.833740234375, 'logps/rejected': -1152.228515625, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'KL/chosen_KL_mean': -666.8336181640625, 'KL/rejected_KL_mean': -1068.4091796875, 'KL/mean': -867.621337890625, 'KL/std': 490.7064514160156, 'logits/chosen': -0.9902809858322144, 'logits/rejected': -1.0138908624649048, 'epoch': 0.99} + 99%|█████████▊| 671/681 [28:31<00:25, 2.56s/it] 99%|█████████▊| 672/681 [28:33<00:22, 2.54s/it] {'loss': 1.1383, 'grad_norm': 36.741268157958984, 'learning_rate': 3.293150240547549e-10, 'fcm_dpo/beta': 0.0009703624527901411, 'fcm_dpo/q_t': 0.41550886631011963, 'fcm_dpo/delta': 0.019587505608797073, 'fcm_dpo/margin': 392.7784423828125, 'margin_dpo/margin_mean': 392.7784118652344, 'margin_dpo/margin_std': 660.264892578125, 'logps/chosen': -789.9805297851562, 'logps/rejected': -1217.3157958984375, 'logps/ref_chosen': -58.58328628540039, 'logps/ref_rejected': -93.14015197753906, 'KL/chosen_KL_mean': -731.397216796875, 'KL/rejected_KL_mean': -1124.175537109375, 'KL/mean': -927.7864990234375, 'KL/std': 593.037109375, 'logits/chosen': -1.065507411956787, 'logits/rejected': -1.0642685890197754, 'epoch': 0.99} + 99%|█████████▊| 672/681 [28:33<00:22, 2.54s/it] 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] {'loss': 1.1215, 'grad_norm': 30.6710262298584, 'learning_rate': 2.6675629940689504e-10, 'fcm_dpo/beta': 0.0009781282860785723, 'fcm_dpo/q_t': 0.41715848445892334, 'fcm_dpo/delta': 0.03722069412469864, 'fcm_dpo/margin': 372.2838134765625, 'margin_dpo/margin_mean': 372.2838134765625, 'margin_dpo/margin_std': 559.671142578125, 'logps/chosen': -721.08203125, 'logps/rejected': -1131.9388427734375, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'KL/chosen_KL_mean': -674.35888671875, 'KL/rejected_KL_mean': -1046.642578125, 'KL/mean': -860.500732421875, 'KL/std': 500.42138671875, 'logits/chosen': -1.0316221714019775, 'logits/rejected': -1.03529691696167, 'epoch': 0.99} + 99%|█████████▉| 673/681 [28:36<00:19, 2.47s/it] 99%|█████████▉| 674/681 [28:38<00:17, 2.55s/it] {'loss': 1.0577, 'grad_norm': 31.129558563232422, 'learning_rate': 2.1077827798404725e-10, 'fcm_dpo/beta': 0.0009717537323012948, 'fcm_dpo/q_t': 0.39771580696105957, 'fcm_dpo/delta': -0.06616582721471786, 'fcm_dpo/margin': 476.61138916015625, 'margin_dpo/margin_mean': 476.61138916015625, 'margin_dpo/margin_std': 633.5640258789062, 'logps/chosen': -615.8072509765625, 'logps/rejected': -1117.01904296875, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'KL/chosen_KL_mean': -570.3616943359375, 'KL/rejected_KL_mean': -1046.97314453125, 'KL/mean': -808.6674194335938, 'KL/std': 530.7771606445312, 'logits/chosen': -0.929929792881012, 'logits/rejected': -0.9406229257583618, 'epoch': 0.99} + 99%|█████████▉| 674/681 [28:38<00:17, 2.55s/it] 99%|█████████▉| 675/681 [28:41<00:15, 2.54s/it] {'loss': 1.0522, 'grad_norm': 26.884960174560547, 'learning_rate': 1.6138243485910863e-10, 'fcm_dpo/beta': 0.0009487034403719008, 'fcm_dpo/q_t': 0.3958815634250641, 'fcm_dpo/delta': -0.07788591086864471, 'fcm_dpo/margin': 497.97161865234375, 'margin_dpo/margin_mean': 497.97161865234375, 'margin_dpo/margin_std': 613.9585571289062, 'logps/chosen': -683.0830688476562, 'logps/rejected': -1210.970458984375, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'KL/chosen_KL_mean': -638.90673828125, 'KL/rejected_KL_mean': -1136.87841796875, 'KL/mean': -887.892578125, 'KL/std': 577.7835693359375, 'logits/chosen': -0.9694858193397522, 'logits/rejected': -0.984051525592804, 'epoch': 0.99} + 99%|█████████▉| 675/681 [28:41<00:15, 2.54s/it] 99%|█████████▉| 676/681 [28:44<00:12, 2.57s/it] {'loss': 1.074, 'grad_norm': 30.11288833618164, 'learning_rate': 1.1857007165852472e-10, 'fcm_dpo/beta': 0.0009498898871243, 'fcm_dpo/q_t': 0.4043177366256714, 'fcm_dpo/delta': -0.01460132747888565, 'fcm_dpo/margin': 435.82708740234375, 'margin_dpo/margin_mean': 435.8271179199219, 'margin_dpo/margin_std': 524.7921142578125, 'logps/chosen': -778.4644775390625, 'logps/rejected': -1231.2518310546875, 'logps/ref_chosen': -71.39852905273438, 'logps/ref_rejected': -88.3587646484375, 'KL/chosen_KL_mean': -707.06591796875, 'KL/rejected_KL_mean': -1142.89306640625, 'KL/mean': -924.9794921875, 'KL/std': 531.7125244140625, 'logits/chosen': -0.9417062997817993, 'logits/rejected': -0.9432613849639893, 'epoch': 0.99} + 99%|█████████▉| 676/681 [28:44<00:12, 2.57s/it] 99%|█████████▉| 677/681 [28:46<00:10, 2.50s/it] {'loss': 1.0959, 'grad_norm': 33.28916931152344, 'learning_rate': 8.23423165278725e-11, 'fcm_dpo/beta': 0.0009504948975518346, 'fcm_dpo/q_t': 0.4103269577026367, 'fcm_dpo/delta': 0.0038806493394076824, 'fcm_dpo/margin': 416.90771484375, 'margin_dpo/margin_mean': 416.90771484375, 'margin_dpo/margin_std': 574.4068603515625, 'logps/chosen': -732.5531005859375, 'logps/rejected': -1171.159912109375, 'logps/ref_chosen': -56.527435302734375, 'logps/ref_rejected': -78.22654724121094, 'KL/chosen_KL_mean': -676.025634765625, 'KL/rejected_KL_mean': -1092.933349609375, 'KL/mean': -884.4794921875, 'KL/std': 490.99835205078125, 'logits/chosen': -1.00029718875885, 'logits/rejected': -0.9821897745132446, 'epoch': 0.99} + 99%|█████████▉| 677/681 [28:46<00:10, 2.50s/it] 100%|█████████▉| 678/681 [28:48<00:07, 2.48s/it] {'loss': 1.0481, 'grad_norm': 30.350744247436523, 'learning_rate': 5.270012410216185e-11, 'fcm_dpo/beta': 0.00094210309907794, 'fcm_dpo/q_t': 0.39141643047332764, 'fcm_dpo/delta': -0.08271745592355728, 'fcm_dpo/margin': 508.2114562988281, 'margin_dpo/margin_mean': 508.21142578125, 'margin_dpo/margin_std': 649.2615966796875, 'logps/chosen': -616.9215087890625, 'logps/rejected': -1159.6031494140625, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'KL/chosen_KL_mean': -570.7870483398438, 'KL/rejected_KL_mean': -1078.99853515625, 'KL/mean': -824.8927612304688, 'KL/std': 591.4881591796875, 'logits/chosen': -0.9413450956344604, 'logits/rejected': -0.971659243106842, 'epoch': 1.0} + 100%|█████████▉| 678/681 [28:48<00:07, 2.48s/it] 100%|█████████▉| 679/681 [28:51<00:05, 2.56s/it] {'loss': 1.1447, 'grad_norm': 31.743593215942383, 'learning_rate': 2.9644275480772416e-11, 'fcm_dpo/beta': 0.0009439511341042817, 'fcm_dpo/q_t': 0.42417770624160767, 'fcm_dpo/delta': 0.07106737792491913, 'fcm_dpo/margin': 351.002197265625, 'margin_dpo/margin_mean': 351.002197265625, 'margin_dpo/margin_std': 545.5799560546875, 'logps/chosen': -721.8994140625, 'logps/rejected': -1099.204833984375, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'KL/chosen_KL_mean': -671.6044921875, 'KL/rejected_KL_mean': -1022.606689453125, 'KL/mean': -847.1055908203125, 'KL/std': 466.68133544921875, 'logits/chosen': -0.9747291803359985, 'logits/rejected': -0.9602512717247009, 'epoch': 1.0} + 100%|█████████▉| 679/681 [28:51<00:05, 2.56s/it] 100%|█████████▉| 680/681 [28:54<00:02, 2.58s/it] {'loss': 1.097, 'grad_norm': 32.0710334777832, 'learning_rate': 1.31753782067201e-11, 'fcm_dpo/beta': 0.0009394378867000341, 'fcm_dpo/q_t': 0.4019904136657715, 'fcm_dpo/delta': -0.031213950365781784, 'fcm_dpo/margin': 457.013916015625, 'margin_dpo/margin_mean': 457.013916015625, 'margin_dpo/margin_std': 679.540283203125, 'logps/chosen': -747.4393310546875, 'logps/rejected': -1239.92236328125, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'KL/chosen_KL_mean': -670.5235595703125, 'KL/rejected_KL_mean': -1127.53759765625, 'KL/mean': -899.030517578125, 'KL/std': 594.6282958984375, 'logits/chosen': -0.9574640989303589, 'logits/rejected': -0.9788249731063843, 'epoch': 1.0} + 100%|█████████▉| 680/681 [28:54<00:02, 2.58s/it] 100%|██████████| 681/681 [28:56<00:00, 2.56s/it] {'loss': 1.1386, 'grad_norm': 24.496997833251953, 'learning_rate': 3.2938662507808745e-12, 'fcm_dpo/beta': 0.0009560231701470912, 'fcm_dpo/q_t': 0.42025691270828247, 'fcm_dpo/delta': 0.05282256752252579, 'fcm_dpo/margin': 363.79510498046875, 'margin_dpo/margin_mean': 363.79510498046875, 'margin_dpo/margin_std': 546.2330932617188, 'logps/chosen': -705.069580078125, 'logps/rejected': -1096.46533203125, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.55797576904297, 'KL/chosen_KL_mean': -644.1123046875, 'KL/rejected_KL_mean': -1007.9073486328125, 'KL/mean': -826.009765625, 'KL/std': 493.124755859375, 'logits/chosen': -1.0301257371902466, 'logits/rejected': -1.0402805805206299, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.56s/it][INFO|trainer.py:2681] 2026-04-29 15:37:12,845 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 1736.7793, 'train_samples_per_second': 25.103, 'train_steps_per_second': 0.392, 'train_loss': 1.126299982641587, 'epoch': 1.0} + 100%|██████████| 681/681 [28:56<00:00, 2.56s/it] 100%|██████████| 681/681 [28:56<00:00, 2.55s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 1.1263 + train_runtime = 0:28:56.77 + train_samples = 43598 + train_samples_per_second = 25.103 + train_steps_per_second = 0.392 +2026-04-29 15:37:12 - INFO - __main__ - *** Training complete *** +2026-04-29 15:37:12 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-29 15:37:45,911 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/config.json +[INFO|configuration_utils.py:911] 2026-04-29 15:37:45,913 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-29 15:38:58,774 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-29 15:38:58,779 >> tokenizer config file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-29 15:38:58,781 >> Special tokens file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/special_tokens_map.json +2026-04-29 15:38:58 - INFO - __main__ - Saved HF-compatible model artifacts to /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449 +[INFO|modelcard.py:450] 2026-04-29 15:39:00,515 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-29 15:39:00,522 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449/config.json +2026-04-29 15:39:00 - INFO - __main__ - Skipping margin dataset upload because push_margin_dataset is false. +2026-04-29 15:39:00 - INFO - __main__ - *** Training complete! *** +wandb: - 0.011 MB of 0.011 MB uploaded wandb: \ 0.011 MB of 0.011 MB uploaded wandb: | 0.011 MB of 0.011 MB uploaded wandb: / 0.011 MB of 0.643 MB uploaded wandb: - 0.483 MB of 0.643 MB uploaded wandb: \ 0.483 MB of 0.643 MB uploaded wandb: | 0.643 MB of 0.643 MB uploaded wandb: +wandb: Run history: +wandb: train/KL/chosen_KL_mean ███████▇▇▇▇▇▆▆▆▆▆▄▅▅▄▅▄▅▄▅▄▂▂▂▂▃▃▂▂▂▁▁▂▁ +wandb: train/KL/mean ███████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▅▄▂▃▂▃▃▂▂▂▂▁▁▁▁ +wandb: train/KL/rejected_KL_mean ██████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▂▃▂▃▃▂▃▂▂▁▁▁▂ +wandb: train/KL/std ▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▇▆▇▆▇▇▆▇▆▇███ +wandb: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/fcm_dpo/beta ▇▇▇▇▇█▇▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +wandb: train/fcm_dpo/delta ▅▅▅▅▅▇▄▆▆▄▃▅▅█▄▅▄▅▆▅█▂▇▂▃▄▃▄▆▄▆▃▁█▆▆▄▂▁▅ +wandb: train/fcm_dpo/margin ▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▃▅▃▅▅▅▅▆▅▆▅▆█▅▅▆▆██▆ +wandb: train/fcm_dpo/q_t ███▇▅▄▃▄▄▃▂▃▃▅▂▃▂▄▃▃▅▁▄▁▂▂▂▃▃▂▄▂▁▄▄▃▂▂▁▃ +wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/grad_norm ▁▁▁▁▂▄▃▃▄▃▃▄▅▄▅▃▄▅▄▄▅▆▄▄▅▆▅▅▅▆▄▇▇▄▄▇▅▇█▅ +wandb: train/learning_rate ▂▃▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁ +wandb: train/logits/chosen ▆▆▆▅▅▅▅▆▇▇███▇▇▇█▆▇▇▆▆▆▆▆▆▅▃▃▃▄▃▂▂▂▂▁▂▂▂ +wandb: train/logits/rejected ▆▆▆▅▅▅▅▆▇▇███▇▇▇█▆▇▇▇▆▆▆▆▆▅▃▃▃▄▃▂▁▂▂▁▂▂▂ +wandb: train/logps/chosen █████▇▇▇▇▇▇▆▆▆▆▆▆▄▅▅▄▅▄▅▄▅▄▁▃▂▂▃▃▂▂▂▁▁▂▁ +wandb: train/logps/ref_chosen ▅█▆▆▄▃▆▆▅▄▇▃▄▃█▅▅▄▇▅▃█▇▅▅▃▆▁▇▅▃▇▅▆▇▆▃▆▆▂ +wandb: train/logps/ref_rejected ▇▄█▅▅▃▆█▇▄▅▃▅▆▅▅▄▅█▅█▄▆▃▅▄▃▁▆▃▄▆▃▄█▅▄▂▂▂ +wandb: train/logps/rejected █████▇▇▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▂▃▂▃▃▂▃▃▂▂▁▁▂ +wandb: train/loss ███▇▅▅▃▄▄▃▂▄▃▅▂▃▂▅▃▃▅▁▄▁▂▂▂▃▃▃▄▂▁▄▄▃▃▂▁▃ +wandb: train/margin_dpo/margin_mean ▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▃▅▃▅▅▅▅▆▅▆▅▆█▅▅▆▆██▆ +wandb: train/margin_dpo/margin_std ▁▁▁▁▁▂▂▂▂▂▂▃▃▃▂▃▃▄▄▄▄▃▃▄▄▄▄▇▅▆▅▇▇▆▇▆▆█▇█ +wandb: +wandb: Run summary: +wandb: total_flos 0.0 +wandb: train/KL/chosen_KL_mean -644.1123 +wandb: train/KL/mean -826.00977 +wandb: train/KL/rejected_KL_mean -1007.90735 +wandb: train/KL/std 493.12476 +wandb: train/epoch 1.0 +wandb: train/fcm_dpo/beta 0.00096 +wandb: train/fcm_dpo/delta 0.05282 +wandb: train/fcm_dpo/margin 363.7951 +wandb: train/fcm_dpo/q_t 0.42026 +wandb: train/global_step 681 +wandb: train/grad_norm 24.497 +wandb: train/learning_rate 0.0 +wandb: train/logits/chosen -1.03013 +wandb: train/logits/rejected -1.04028 +wandb: train/logps/chosen -705.06958 +wandb: train/logps/ref_chosen -60.95728 +wandb: train/logps/ref_rejected -88.55798 +wandb: train/logps/rejected -1096.46533 +wandb: train/loss 1.1386 +wandb: train/margin_dpo/margin_mean 363.7951 +wandb: train/margin_dpo/margin_std 546.23309 +wandb: train_loss 1.1263 +wandb: train_runtime 1736.7793 +wandb: train_samples_per_second 25.103 +wandb: train_steps_per_second 0.392 +wandb: +wandb: 🚀 View run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p01-20260429-085449 at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/76c0wk78 +wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/wandb/run-20260429_145753-76c0wk78/logs +wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information. diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..ac66d6e --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.126299982641587, + "train_runtime": 1736.7793, + "train_samples": 43598, + "train_samples_per_second": 25.103, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..e178c5c --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,15706 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "KL/chosen_KL_mean": 0.00527191162109375, + "KL/mean": 0.016706019639968872, + "KL/rejected_KL_mean": 0.028141021728515625, + "KL/std": 0.272699236869812, + "epoch": 0.0014684287812041115, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02287006378173828, + "fcm_dpo/q_t": 0.5000571608543396, + "grad_norm": 8.340126991271973, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.3865, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "KL/chosen_KL_mean": -0.03498649597167969, + "KL/mean": -0.00212840735912323, + "KL/rejected_KL_mean": 0.030735015869140625, + "KL/std": 0.24797174334526062, + "epoch": 0.002936857562408223, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06572261452674866, + "fcm_dpo/q_t": 0.500164270401001, + "grad_norm": 7.205794811248779, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.49536412954330444, + "logits/rejected": -0.4594460427761078, + "logps/chosen": -52.65568923950195, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.387, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "KL/chosen_KL_mean": 0.028177261352539062, + "KL/mean": 0.011634737253189087, + "KL/rejected_KL_mean": -0.00490570068359375, + "KL/std": 0.2545679211616516, + "epoch": 0.004405286343612335, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.03308027982711792, + "fcm_dpo/q_t": 0.49991729855537415, + "grad_norm": 7.091545581817627, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.4817052185535431, + "logits/rejected": -0.44228988885879517, + "logps/chosen": -60.95341873168945, + "logps/ref_chosen": -60.981597900390625, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.67750549316406, + "loss": 1.386, + "margin_dpo/margin_mean": 0.03308090567588806, + "margin_dpo/margin_std": 0.3488999903202057, + "step": 3 + }, + { + "KL/chosen_KL_mean": 0.006183624267578125, + "KL/mean": 0.019635915756225586, + "KL/rejected_KL_mean": 0.03308868408203125, + "KL/std": 0.28558221459388733, + "epoch": 0.005873715124816446, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.026903212070465088, + "fcm_dpo/q_t": 0.5000672340393066, + "grad_norm": 7.214421272277832, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.468106746673584, + "logits/rejected": -0.44051337242126465, + "logps/chosen": -56.76152801513672, + "logps/ref_chosen": -56.7677116394043, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.61402130126953, + "loss": 1.3866, + "margin_dpo/margin_mean": -0.026903808116912842, + "margin_dpo/margin_std": 0.39421218633651733, + "step": 4 + }, + { + "KL/chosen_KL_mean": 0.01943206787109375, + "KL/mean": 0.002883225679397583, + "KL/rejected_KL_mean": -0.013660430908203125, + "KL/std": 0.2767731547355652, + "epoch": 0.007342143906020558, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.03309446573257446, + "fcm_dpo/q_t": 0.49991726875305176, + "grad_norm": 8.964797019958496, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.5146475434303284, + "logits/rejected": -0.47093117237091064, + "logps/chosen": -53.839942932128906, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.162841796875, + "loss": 1.386, + "margin_dpo/margin_mean": 0.033094823360443115, + "margin_dpo/margin_std": 0.38494962453842163, + "step": 5 + }, + { + "KL/chosen_KL_mean": -0.009487152099609375, + "KL/mean": -0.01324455440044403, + "KL/rejected_KL_mean": -0.01700592041015625, + "KL/std": 0.27032917737960815, + "epoch": 0.00881057268722467, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.007514864206314087, + "fcm_dpo/q_t": 0.49998119473457336, + "grad_norm": 9.190613746643066, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.5035334825515747, + "logits/rejected": -0.46098393201828003, + "logps/chosen": -63.016971588134766, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.662353515625, + "loss": 1.3862, + "margin_dpo/margin_mean": 0.007514625787734985, + "margin_dpo/margin_std": 0.3818962574005127, + "step": 6 + }, + { + "KL/chosen_KL_mean": 0.06007957458496094, + "KL/mean": 0.0322260856628418, + "KL/rejected_KL_mean": 0.004375457763671875, + "KL/std": 0.2890022397041321, + "epoch": 0.010279001468428781, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.05570727586746216, + "fcm_dpo/q_t": 0.4998607337474823, + "grad_norm": 8.227945327758789, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5052176713943481, + "logits/rejected": -0.47141021490097046, + "logps/chosen": -57.71474075317383, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.91621398925781, + "loss": 1.3857, + "margin_dpo/margin_mean": 0.055707335472106934, + "margin_dpo/margin_std": 0.38999414443969727, + "step": 7 + }, + { + "KL/chosen_KL_mean": 0.039844512939453125, + "KL/mean": 0.04513771831989288, + "KL/rejected_KL_mean": 0.050434112548828125, + "KL/std": 0.3095516264438629, + "epoch": 0.011747430249632892, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.010594159364700317, + "fcm_dpo/q_t": 0.5000264644622803, + "grad_norm": 7.855659008026123, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5170360803604126, + "logits/rejected": -0.492270290851593, + "logps/chosen": -58.67619323730469, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.260986328125, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.010594218969345093, + "margin_dpo/margin_std": 0.42736732959747314, + "step": 8 + }, + { + "KL/chosen_KL_mean": 0.0255889892578125, + "KL/mean": 0.0015124678611755371, + "KL/rejected_KL_mean": -0.0225677490234375, + "KL/std": 0.2851980924606323, + "epoch": 0.013215859030837005, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.048153460025787354, + "fcm_dpo/q_t": 0.4998795986175537, + "grad_norm": 8.50635814666748, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.4870206117630005, + "logits/rejected": -0.4398488402366638, + "logps/chosen": -69.84125518798828, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.62522888183594, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.04815271496772766, + "margin_dpo/margin_std": 0.38030916452407837, + "step": 9 + }, + { + "KL/chosen_KL_mean": -0.014789581298828125, + "KL/mean": -0.01405847817659378, + "KL/rejected_KL_mean": -0.0133209228515625, + "KL/std": 0.2681947946548462, + "epoch": 0.014684287812041116, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.0014634579420089722, + "fcm_dpo/q_t": 0.5000036358833313, + "grad_norm": 7.091888427734375, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4998844861984253, + "logits/rejected": -0.45695722103118896, + "logps/chosen": -48.372474670410156, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.38538360595703, + "loss": 1.3863, + "margin_dpo/margin_mean": -0.001463174819946289, + "margin_dpo/margin_std": 0.3855435252189636, + "step": 10 + }, + { + "KL/chosen_KL_mean": -0.007877349853515625, + "KL/mean": -0.010366648435592651, + "KL/rejected_KL_mean": -0.01285552978515625, + "KL/std": 0.2346026599407196, + "epoch": 0.016152716593245228, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.004973113536834717, + "fcm_dpo/q_t": 0.49998754262924194, + "grad_norm": 6.8613715171813965, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.46066391468048096, + "logits/rejected": -0.4356629252433777, + "logps/chosen": -53.02473449707031, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.79322814941406, + "loss": 1.3862, + "margin_dpo/margin_mean": 0.0049735307693481445, + "margin_dpo/margin_std": 0.2909265458583832, + "step": 11 + }, + { + "KL/chosen_KL_mean": 0.006072998046875, + "KL/mean": -0.024121850728988647, + "KL/rejected_KL_mean": -0.054309844970703125, + "KL/std": 0.3304472863674164, + "epoch": 0.01762114537444934, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06038558483123779, + "fcm_dpo/q_t": 0.49984902143478394, + "grad_norm": 9.009154319763184, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.5414502620697021, + "logits/rejected": -0.5054250359535217, + "logps/chosen": -61.79936218261719, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.8582763671875, + "logps/rejected": -104.91258239746094, + "loss": 1.3857, + "margin_dpo/margin_mean": 0.0603850781917572, + "margin_dpo/margin_std": 0.43303191661834717, + "step": 12 + }, + { + "KL/chosen_KL_mean": -0.0059814453125, + "KL/mean": 0.00032275915145874023, + "KL/rejected_KL_mean": 0.00662994384765625, + "KL/std": 0.2694360017776489, + "epoch": 0.01908957415565345, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.012606263160705566, + "fcm_dpo/q_t": 0.5000314712524414, + "grad_norm": 7.9163641929626465, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.49102455377578735, + "logits/rejected": -0.46374207735061646, + "logps/chosen": -64.2663345336914, + "logps/ref_chosen": -64.2603530883789, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.19645690917969, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.012606710195541382, + "margin_dpo/margin_std": 0.3794170618057251, + "step": 13 + }, + { + "KL/chosen_KL_mean": -0.03643989562988281, + "KL/mean": -0.018927976489067078, + "KL/rejected_KL_mean": -0.0014190673828125, + "KL/std": 0.2660324275493622, + "epoch": 0.020558002936857563, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.03501877188682556, + "fcm_dpo/q_t": 0.5000874996185303, + "grad_norm": 8.576128005981445, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.49155694246292114, + "logits/rejected": -0.4527207314968109, + "logps/chosen": -58.14665222167969, + "logps/ref_chosen": -58.11021041870117, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.04850006103516, + "loss": 1.3866, + "margin_dpo/margin_mean": -0.03501787781715393, + "margin_dpo/margin_std": 0.4262927174568176, + "step": 14 + }, + { + "KL/chosen_KL_mean": -0.028324127197265625, + "KL/mean": -0.051674991846084595, + "KL/rejected_KL_mean": -0.07502365112304688, + "KL/std": 0.23149462044239044, + "epoch": 0.022026431718061675, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04670119285583496, + "fcm_dpo/q_t": 0.499883234500885, + "grad_norm": 6.426931858062744, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.5326635837554932, + "logits/rejected": -0.5161415338516235, + "logps/chosen": -56.99523162841797, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.8836669921875, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.04670119285583496, + "margin_dpo/margin_std": 0.32319122552871704, + "step": 15 + }, + { + "KL/chosen_KL_mean": -0.04820060729980469, + "KL/mean": -0.04530364274978638, + "KL/rejected_KL_mean": -0.0424041748046875, + "KL/std": 0.29397979378700256, + "epoch": 0.023494860499265784, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.005795121192932129, + "fcm_dpo/q_t": 0.5000145435333252, + "grad_norm": 8.416418075561523, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.5461217164993286, + "logits/rejected": -0.5072727203369141, + "logps/chosen": -61.78809356689453, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.41188049316406, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.005795121192932129, + "margin_dpo/margin_std": 0.3890739381313324, + "step": 16 + }, + { + "KL/chosen_KL_mean": 0.046901702880859375, + "KL/mean": -0.010359078645706177, + "KL/rejected_KL_mean": -0.06762313842773438, + "KL/std": 0.27913013100624084, + "epoch": 0.024963289280469897, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.11452382802963257, + "fcm_dpo/q_t": 0.4997136890888214, + "grad_norm": 7.9028801918029785, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.5041570067405701, + "logits/rejected": -0.4673753082752228, + "logps/chosen": -67.66343688964844, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.44627380371094, + "loss": 1.3852, + "margin_dpo/margin_mean": 0.11452355980873108, + "margin_dpo/margin_std": 0.37491074204444885, + "step": 17 + }, + { + "KL/chosen_KL_mean": -0.0292816162109375, + "KL/mean": -0.03873269259929657, + "KL/rejected_KL_mean": -0.048187255859375, + "KL/std": 0.23902130126953125, + "epoch": 0.02643171806167401, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.018904417753219604, + "fcm_dpo/q_t": 0.4999527335166931, + "grad_norm": 8.233268737792969, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.5057047009468079, + "logits/rejected": -0.45175978541374207, + "logps/chosen": -47.76877212524414, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.52047729492188, + "loss": 1.3861, + "margin_dpo/margin_mean": 0.01890420913696289, + "margin_dpo/margin_std": 0.31775712966918945, + "step": 18 + }, + { + "KL/chosen_KL_mean": 0.03473663330078125, + "KL/mean": -0.004338964819908142, + "KL/rejected_KL_mean": -0.0434112548828125, + "KL/std": 0.24089065194129944, + "epoch": 0.027900146842878122, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.07813850045204163, + "fcm_dpo/q_t": 0.49980464577674866, + "grad_norm": 7.455746173858643, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.5036299228668213, + "logits/rejected": -0.45466092228889465, + "logps/chosen": -70.17062377929688, + "logps/ref_chosen": -70.20536041259766, + "logps/ref_rejected": -89.7575912475586, + "logps/rejected": -89.8010025024414, + "loss": 1.3855, + "margin_dpo/margin_mean": 0.07813867926597595, + "margin_dpo/margin_std": 0.3746962547302246, + "step": 19 + }, + { + "KL/chosen_KL_mean": -0.049213409423828125, + "KL/mean": -0.0454762727022171, + "KL/rejected_KL_mean": -0.041736602783203125, + "KL/std": 0.29056376218795776, + "epoch": 0.02936857562408223, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.007474333047866821, + "fcm_dpo/q_t": 0.5000186562538147, + "grad_norm": 7.4274702072143555, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5517487525939941, + "logits/rejected": -0.49535927176475525, + "logps/chosen": -50.852455139160156, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.82334899902344, + "logps/rejected": -78.86508178710938, + "loss": 1.3864, + "margin_dpo/margin_mean": -0.007474362850189209, + "margin_dpo/margin_std": 0.37508344650268555, + "step": 20 + }, + { + "KL/chosen_KL_mean": -0.0056247711181640625, + "KL/mean": -0.04150792211294174, + "KL/rejected_KL_mean": -0.077392578125, + "KL/std": 0.30757784843444824, + "epoch": 0.030837004405286344, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.07176293432712555, + "fcm_dpo/q_t": 0.4998205900192261, + "grad_norm": 7.820558547973633, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.5255781412124634, + "logits/rejected": -0.5039485096931458, + "logps/chosen": -50.068641662597656, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -77.94618225097656, + "loss": 1.3856, + "margin_dpo/margin_mean": 0.07176269590854645, + "margin_dpo/margin_std": 0.43745559453964233, + "step": 21 + }, + { + "KL/chosen_KL_mean": -0.027448654174804688, + "KL/mean": -0.0883231908082962, + "KL/rejected_KL_mean": -0.14919662475585938, + "KL/std": 0.2602458596229553, + "epoch": 0.032305433186490456, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.12175038456916809, + "fcm_dpo/q_t": 0.49969562888145447, + "grad_norm": 8.56733512878418, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.4898416996002197, + "logits/rejected": -0.44627994298934937, + "logps/chosen": -59.0850830078125, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.65386962890625, + "loss": 1.3851, + "margin_dpo/margin_mean": 0.12175118923187256, + "margin_dpo/margin_std": 0.3743841052055359, + "step": 22 + }, + { + "KL/chosen_KL_mean": 0.06468772888183594, + "KL/mean": -0.022989824414253235, + "KL/rejected_KL_mean": -0.11066055297851562, + "KL/std": 0.3429142236709595, + "epoch": 0.033773861967694566, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.17535313963890076, + "fcm_dpo/q_t": 0.499561607837677, + "grad_norm": 8.251264572143555, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.4786554276943207, + "logits/rejected": -0.4556560516357422, + "logps/chosen": -60.01300811767578, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.13955688476562, + "logps/rejected": -81.25021362304688, + "loss": 1.3845, + "margin_dpo/margin_mean": 0.17535346746444702, + "margin_dpo/margin_std": 0.47338640689849854, + "step": 23 + }, + { + "KL/chosen_KL_mean": 0.06081390380859375, + "KL/mean": -0.07897857576608658, + "KL/rejected_KL_mean": -0.21877288818359375, + "KL/std": 0.30757251381874084, + "epoch": 0.03524229074889868, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2795853614807129, + "fcm_dpo/q_t": 0.499301016330719, + "grad_norm": 8.768179893493652, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.51224684715271, + "logits/rejected": -0.4956665635108948, + "logps/chosen": -44.230220794677734, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.34398651123047, + "loss": 1.3835, + "margin_dpo/margin_mean": 0.2795855402946472, + "margin_dpo/margin_std": 0.3744848668575287, + "step": 24 + }, + { + "KL/chosen_KL_mean": 0.04968070983886719, + "KL/mean": -0.09518682956695557, + "KL/rejected_KL_mean": -0.24005126953125, + "KL/std": 0.369601845741272, + "epoch": 0.03671071953010279, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2897287607192993, + "fcm_dpo/q_t": 0.49927568435668945, + "grad_norm": 7.525589942932129, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.4925091564655304, + "logits/rejected": -0.4624241888523102, + "logps/chosen": -52.48737335205078, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.58224487304688, + "loss": 1.3834, + "margin_dpo/margin_mean": 0.28972867131233215, + "margin_dpo/margin_std": 0.42933177947998047, + "step": 25 + }, + { + "KL/chosen_KL_mean": 0.09215354919433594, + "KL/mean": -0.1101590245962143, + "KL/rejected_KL_mean": -0.3124732971191406, + "KL/std": 0.4453110992908478, + "epoch": 0.0381791483113069, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.4046301543712616, + "fcm_dpo/q_t": 0.49898844957351685, + "grad_norm": 9.038716316223145, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5335673689842224, + "logits/rejected": -0.5019059777259827, + "logps/chosen": -53.83065414428711, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.67218780517578, + "loss": 1.3823, + "margin_dpo/margin_mean": 0.40463075041770935, + "margin_dpo/margin_std": 0.5417345762252808, + "step": 26 + }, + { + "KL/chosen_KL_mean": 0.1369171142578125, + "KL/mean": -0.1361747682094574, + "KL/rejected_KL_mean": -0.4092674255371094, + "KL/std": 0.48576533794403076, + "epoch": 0.039647577092511016, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5461834073066711, + "fcm_dpo/q_t": 0.49863457679748535, + "grad_norm": 9.623809814453125, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.5602696537971497, + "logits/rejected": -0.5244206190109253, + "logps/chosen": -42.76161193847656, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72419738769531, + "logps/rejected": -99.13346862792969, + "loss": 1.3809, + "margin_dpo/margin_mean": 0.5461829900741577, + "margin_dpo/margin_std": 0.6316946744918823, + "step": 27 + }, + { + "KL/chosen_KL_mean": 0.014486312866210938, + "KL/mean": -0.14598755538463593, + "KL/rejected_KL_mean": -0.3064613342285156, + "KL/std": 0.3845537304878235, + "epoch": 0.041116005873715125, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.32094791531562805, + "fcm_dpo/q_t": 0.4991976320743561, + "grad_norm": 7.836782932281494, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.502698540687561, + "logits/rejected": -0.4471771717071533, + "logps/chosen": -60.542015075683594, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.70758056640625, + "loss": 1.3831, + "margin_dpo/margin_mean": 0.3209477663040161, + "margin_dpo/margin_std": 0.5258319973945618, + "step": 28 + }, + { + "KL/chosen_KL_mean": 0.07568168640136719, + "KL/mean": -0.15161648392677307, + "KL/rejected_KL_mean": -0.37891387939453125, + "KL/std": 0.46041831374168396, + "epoch": 0.042584434654919234, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.4545966386795044, + "fcm_dpo/q_t": 0.4988635182380676, + "grad_norm": 9.418075561523438, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5607113242149353, + "logits/rejected": -0.5150310397148132, + "logps/chosen": -57.73210144042969, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.77326965332031, + "loss": 1.3818, + "margin_dpo/margin_mean": 0.4545968770980835, + "margin_dpo/margin_std": 0.5804776549339294, + "step": 29 + }, + { + "KL/chosen_KL_mean": 0.11934471130371094, + "KL/mean": -0.19414639472961426, + "KL/rejected_KL_mean": -0.5076408386230469, + "KL/std": 0.5560356974601746, + "epoch": 0.04405286343612335, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.6269863843917847, + "fcm_dpo/q_t": 0.49843254685401917, + "grad_norm": 9.180720329284668, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.46149182319641113, + "logits/rejected": -0.42938873171806335, + "logps/chosen": -52.45802688598633, + "logps/ref_chosen": -52.577369689941406, + "logps/ref_rejected": -98.48920440673828, + "logps/rejected": -98.99684143066406, + "loss": 1.38, + "margin_dpo/margin_mean": 0.6269862651824951, + "margin_dpo/margin_std": 0.565685510635376, + "step": 30 + }, + { + "KL/chosen_KL_mean": 0.07823753356933594, + "KL/mean": -0.14964136481285095, + "KL/rejected_KL_mean": -0.3775215148925781, + "KL/std": 0.5351479649543762, + "epoch": 0.04552129221732746, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.4557562470436096, + "fcm_dpo/q_t": 0.498860627412796, + "grad_norm": 7.051517009735107, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.4864119291305542, + "logits/rejected": -0.43947017192840576, + "logps/chosen": -63.72868347167969, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.27153015136719, + "loss": 1.3818, + "margin_dpo/margin_mean": 0.455756276845932, + "margin_dpo/margin_std": 0.6158726215362549, + "step": 31 + }, + { + "KL/chosen_KL_mean": 0.21595001220703125, + "KL/mean": -0.17387576401233673, + "KL/rejected_KL_mean": -0.5637054443359375, + "KL/std": 0.7160457968711853, + "epoch": 0.04698972099853157, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.7796535491943359, + "fcm_dpo/q_t": 0.4980509281158447, + "grad_norm": 8.872539520263672, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5233839750289917, + "logits/rejected": -0.48274725675582886, + "logps/chosen": -62.52357482910156, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.88121032714844, + "loss": 1.3785, + "margin_dpo/margin_mean": 0.7796535491943359, + "margin_dpo/margin_std": 0.9119139909744263, + "step": 32 + }, + { + "KL/chosen_KL_mean": 0.10995101928710938, + "KL/mean": -0.19211336970329285, + "KL/rejected_KL_mean": -0.49417877197265625, + "KL/std": 0.5824633836746216, + "epoch": 0.048458149779735685, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.6041242480278015, + "fcm_dpo/q_t": 0.49848970770835876, + "grad_norm": 7.534836292266846, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.49200475215911865, + "logits/rejected": -0.465828537940979, + "logps/chosen": -53.151023864746094, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.37931823730469, + "loss": 1.3803, + "margin_dpo/margin_mean": 0.6041243076324463, + "margin_dpo/margin_std": 0.696311354637146, + "step": 33 + }, + { + "KL/chosen_KL_mean": 0.05680084228515625, + "KL/mean": -0.3211583197116852, + "KL/rejected_KL_mean": -0.6991157531738281, + "KL/std": 0.7343294620513916, + "epoch": 0.049926578560939794, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.7559173107147217, + "fcm_dpo/q_t": 0.49811026453971863, + "grad_norm": 8.2849702835083, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.4856771230697632, + "logits/rejected": -0.4683513939380646, + "logps/chosen": -50.760528564453125, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.62095642089844, + "loss": 1.3788, + "margin_dpo/margin_mean": 0.7559161186218262, + "margin_dpo/margin_std": 0.89031583070755, + "step": 34 + }, + { + "KL/chosen_KL_mean": 0.1258563995361328, + "KL/mean": -0.46609964966773987, + "KL/rejected_KL_mean": -1.0580558776855469, + "KL/std": 0.997234582901001, + "epoch": 0.0513950073421439, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1839112043380737, + "fcm_dpo/q_t": 0.49704039096832275, + "grad_norm": 9.21121597290039, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.5175144672393799, + "logits/rejected": -0.48064374923706055, + "logps/chosen": -50.89863586425781, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.88248443603516, + "loss": 1.3745, + "margin_dpo/margin_mean": 1.1839113235473633, + "margin_dpo/margin_std": 1.1529996395111084, + "step": 35 + }, + { + "KL/chosen_KL_mean": 0.013143539428710938, + "KL/mean": -0.5748996138572693, + "KL/rejected_KL_mean": -1.1629409790039062, + "KL/std": 1.143606424331665, + "epoch": 0.05286343612334802, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1760886907577515, + "fcm_dpo/q_t": 0.4970599412918091, + "grad_norm": 8.075494766235352, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.569900393486023, + "logits/rejected": -0.5340551733970642, + "logps/chosen": -51.97834777832031, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.0406265258789, + "logps/rejected": -87.20356750488281, + "loss": 1.3746, + "margin_dpo/margin_mean": 1.1760879755020142, + "margin_dpo/margin_std": 1.2999153137207031, + "step": 36 + }, + { + "KL/chosen_KL_mean": 0.00957489013671875, + "KL/mean": -0.5088434219360352, + "KL/rejected_KL_mean": -1.0272636413574219, + "KL/std": 1.0523037910461426, + "epoch": 0.05433186490455213, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.0368335247039795, + "fcm_dpo/q_t": 0.4974081218242645, + "grad_norm": 6.84469747543335, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.5325401425361633, + "logits/rejected": -0.49065572023391724, + "logps/chosen": -62.79753112792969, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.92233276367188, + "loss": 1.376, + "margin_dpo/margin_mean": 1.0368335247039795, + "margin_dpo/margin_std": 1.3373100757598877, + "step": 37 + }, + { + "KL/chosen_KL_mean": 0.1680927276611328, + "KL/mean": -0.552198052406311, + "KL/rejected_KL_mean": -1.272491455078125, + "KL/std": 1.348757266998291, + "epoch": 0.055800293685756244, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.4405823945999146, + "fcm_dpo/q_t": 0.4963989853858948, + "grad_norm": 7.924060821533203, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5047751665115356, + "logits/rejected": -0.47182124853134155, + "logps/chosen": -48.22242736816406, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.1849365234375, + "loss": 1.372, + "margin_dpo/margin_mean": 1.440582513809204, + "margin_dpo/margin_std": 1.5998311042785645, + "step": 38 + }, + { + "KL/chosen_KL_mean": 0.09676551818847656, + "KL/mean": -0.7393452525138855, + "KL/rejected_KL_mean": -1.5754547119140625, + "KL/std": 1.302678108215332, + "epoch": 0.05726872246696035, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.6722157001495361, + "fcm_dpo/q_t": 0.4958198070526123, + "grad_norm": 8.258176803588867, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5640593767166138, + "logits/rejected": -0.5244793891906738, + "logps/chosen": -50.653709411621094, + "logps/ref_chosen": -50.75047302246094, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.14496612548828, + "loss": 1.3697, + "margin_dpo/margin_mean": 1.6722155809402466, + "margin_dpo/margin_std": 1.39006769657135, + "step": 39 + }, + { + "KL/chosen_KL_mean": 0.2056598663330078, + "KL/mean": -0.5616458654403687, + "KL/rejected_KL_mean": -1.3289527893066406, + "KL/std": 1.4022493362426758, + "epoch": 0.05873715124816446, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.5346159934997559, + "fcm_dpo/q_t": 0.4961639642715454, + "grad_norm": 6.980234146118164, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.5176148414611816, + "logits/rejected": -0.4874315857887268, + "logps/chosen": -57.7794075012207, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.3000717163086, + "logps/rejected": -75.6290283203125, + "loss": 1.3711, + "margin_dpo/margin_mean": 1.5346163511276245, + "margin_dpo/margin_std": 1.6990015506744385, + "step": 40 + }, + { + "KL/chosen_KL_mean": 0.01158905029296875, + "KL/mean": -0.9059728980064392, + "KL/rejected_KL_mean": -1.8235282897949219, + "KL/std": 1.8433566093444824, + "epoch": 0.06020558002936858, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.8351185321807861, + "fcm_dpo/q_t": 0.49541300535202026, + "grad_norm": 8.068608283996582, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.5168710350990295, + "logits/rejected": -0.4790883958339691, + "logps/chosen": -62.684226989746094, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.84706115722656, + "loss": 1.3681, + "margin_dpo/margin_mean": 1.8351190090179443, + "margin_dpo/margin_std": 2.0257954597473145, + "step": 41 + }, + { + "KL/chosen_KL_mean": 0.2347736358642578, + "KL/mean": -1.0914355516433716, + "KL/rejected_KL_mean": -2.4176406860351562, + "KL/std": 2.2188539505004883, + "epoch": 0.06167400881057269, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.6524176597595215, + "fcm_dpo/q_t": 0.49337083101272583, + "grad_norm": 9.999738693237305, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.5359020233154297, + "logits/rejected": -0.4893391728401184, + "logps/chosen": -58.731651306152344, + "logps/ref_chosen": -58.966426849365234, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.32601928710938, + "loss": 1.3601, + "margin_dpo/margin_mean": 2.6524174213409424, + "margin_dpo/margin_std": 2.480203628540039, + "step": 42 + }, + { + "KL/chosen_KL_mean": 0.5463447570800781, + "KL/mean": -0.7033693790435791, + "KL/rejected_KL_mean": -1.9530906677246094, + "KL/std": 1.851230263710022, + "epoch": 0.0631424375917768, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.4994335174560547, + "fcm_dpo/q_t": 0.4937525987625122, + "grad_norm": 8.732246398925781, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.5335399508476257, + "logits/rejected": -0.5083379745483398, + "logps/chosen": -53.609649658203125, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.43328094482422, + "loss": 1.3616, + "margin_dpo/margin_mean": 2.4994330406188965, + "margin_dpo/margin_std": 2.0141167640686035, + "step": 43 + }, + { + "KL/chosen_KL_mean": 0.25655555725097656, + "KL/mean": -1.1926369667053223, + "KL/rejected_KL_mean": -2.641826629638672, + "KL/std": 2.3078997135162354, + "epoch": 0.06461086637298091, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.8983845710754395, + "fcm_dpo/q_t": 0.4927557110786438, + "grad_norm": 9.835100173950195, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.48626744747161865, + "logits/rejected": -0.465964674949646, + "logps/chosen": -49.82194519042969, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.42558288574219, + "loss": 1.3577, + "margin_dpo/margin_mean": 2.8983850479125977, + "margin_dpo/margin_std": 2.2746810913085938, + "step": 44 + }, + { + "KL/chosen_KL_mean": 0.12450027465820312, + "KL/mean": -1.0119799375534058, + "KL/rejected_KL_mean": -2.1484642028808594, + "KL/std": 2.0461864471435547, + "epoch": 0.06607929515418502, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.2729620933532715, + "fcm_dpo/q_t": 0.49431926012039185, + "grad_norm": 7.852822303771973, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.457671582698822, + "logits/rejected": -0.4445871412754059, + "logps/chosen": -48.290428161621094, + "logps/ref_chosen": -48.4149284362793, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -80.08489227294922, + "loss": 1.3639, + "margin_dpo/margin_mean": 2.2729620933532715, + "margin_dpo/margin_std": 2.53743839263916, + "step": 45 + }, + { + "KL/chosen_KL_mean": 0.15254783630371094, + "KL/mean": -1.3116981983184814, + "KL/rejected_KL_mean": -2.7759437561035156, + "KL/std": 2.635380744934082, + "epoch": 0.06754772393538913, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.92849063873291, + "fcm_dpo/q_t": 0.4926820993423462, + "grad_norm": 9.287505149841309, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.5358284115791321, + "logits/rejected": -0.48519566655158997, + "logps/chosen": -55.84687805175781, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.42852783203125, + "loss": 1.3575, + "margin_dpo/margin_mean": 2.92849063873291, + "margin_dpo/margin_std": 3.251277446746826, + "step": 46 + }, + { + "KL/chosen_KL_mean": 0.4180793762207031, + "KL/mean": -1.0824042558670044, + "KL/rejected_KL_mean": -2.5828895568847656, + "KL/std": 2.6021361351013184, + "epoch": 0.06901615271659324, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.0009684562683105, + "fcm_dpo/q_t": 0.49249979853630066, + "grad_norm": 8.359269142150879, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.583840012550354, + "logits/rejected": -0.531823992729187, + "logps/chosen": -57.507999420166016, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -97.26210021972656, + "loss": 1.3567, + "margin_dpo/margin_mean": 3.000969409942627, + "margin_dpo/margin_std": 2.6266069412231445, + "step": 47 + }, + { + "KL/chosen_KL_mean": 0.00414276123046875, + "KL/mean": -1.5863243341445923, + "KL/rejected_KL_mean": -3.176788330078125, + "KL/std": 2.768789768218994, + "epoch": 0.07048458149779736, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.1809349060058594, + "fcm_dpo/q_t": 0.4920506179332733, + "grad_norm": 9.244763374328613, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.590816855430603, + "logits/rejected": -0.532641589641571, + "logps/chosen": -57.183929443359375, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -91.19338989257812, + "loss": 1.355, + "margin_dpo/margin_mean": 3.1809351444244385, + "margin_dpo/margin_std": 3.039764881134033, + "step": 48 + }, + { + "KL/chosen_KL_mean": 0.3029060363769531, + "KL/mean": -1.659820556640625, + "KL/rejected_KL_mean": -3.622547149658203, + "KL/std": 3.5545148849487305, + "epoch": 0.07195301027900147, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.9254517555236816, + "fcm_dpo/q_t": 0.49019408226013184, + "grad_norm": 8.854732513427734, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.5587940812110901, + "logits/rejected": -0.5012864470481873, + "logps/chosen": -61.382362365722656, + "logps/ref_chosen": -61.685272216796875, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -87.39002227783203, + "loss": 1.3479, + "margin_dpo/margin_mean": 3.9254512786865234, + "margin_dpo/margin_std": 4.098909378051758, + "step": 49 + }, + { + "KL/chosen_KL_mean": -0.2061939239501953, + "KL/mean": -2.284212827682495, + "KL/rejected_KL_mean": -4.3622283935546875, + "KL/std": 3.580059051513672, + "epoch": 0.07342143906020558, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.156033515930176, + "fcm_dpo/q_t": 0.4896165132522583, + "grad_norm": 8.716004371643066, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5688312649726868, + "logits/rejected": -0.5336655378341675, + "logps/chosen": -58.93033218383789, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -100.72037506103516, + "loss": 1.3456, + "margin_dpo/margin_mean": 4.156033515930176, + "margin_dpo/margin_std": 4.06223201751709, + "step": 50 + }, + { + "KL/chosen_KL_mean": -0.30522727966308594, + "KL/mean": -2.3866991996765137, + "KL/rejected_KL_mean": -4.468173980712891, + "KL/std": 4.316704750061035, + "epoch": 0.07488986784140969, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.162949562072754, + "fcm_dpo/q_t": 0.4896053671836853, + "grad_norm": 8.142417907714844, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.5444722175598145, + "logits/rejected": -0.5125424861907959, + "logps/chosen": -61.67889404296875, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -80.47017669677734, + "loss": 1.3458, + "margin_dpo/margin_mean": 4.162949562072754, + "margin_dpo/margin_std": 5.265970706939697, + "step": 51 + }, + { + "KL/chosen_KL_mean": 0.31786346435546875, + "KL/mean": -2.8440990447998047, + "KL/rejected_KL_mean": -6.006065368652344, + "KL/std": 4.961765289306641, + "epoch": 0.0763582966226138, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.323929309844971, + "fcm_dpo/q_t": 0.48420995473861694, + "grad_norm": 9.986041069030762, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5617812275886536, + "logits/rejected": -0.5068017840385437, + "logps/chosen": -52.019493103027344, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -85.97998046875, + "loss": 1.3248, + "margin_dpo/margin_mean": 6.323929309844971, + "margin_dpo/margin_std": 5.412091255187988, + "step": 52 + }, + { + "KL/chosen_KL_mean": -0.24696731567382812, + "KL/mean": -3.4364819526672363, + "KL/rejected_KL_mean": -6.6259918212890625, + "KL/std": 5.564968109130859, + "epoch": 0.07782672540381791, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.37903356552124, + "fcm_dpo/q_t": 0.4840887486934662, + "grad_norm": 10.334386825561523, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.6039080619812012, + "logits/rejected": -0.5819511413574219, + "logps/chosen": -53.56161880493164, + "logps/ref_chosen": -53.31465148925781, + "logps/ref_rejected": -91.78359985351562, + "logps/rejected": -98.40959167480469, + "loss": 1.3248, + "margin_dpo/margin_mean": 6.379033088684082, + "margin_dpo/margin_std": 6.412992477416992, + "step": 53 + }, + { + "KL/chosen_KL_mean": -0.4557170867919922, + "KL/mean": -3.2610464096069336, + "KL/rejected_KL_mean": -6.066375732421875, + "KL/std": 5.338939189910889, + "epoch": 0.07929515418502203, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.610658645629883, + "fcm_dpo/q_t": 0.4859907031059265, + "grad_norm": 8.843002319335938, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.6185827255249023, + "logits/rejected": -0.566498339176178, + "logps/chosen": -51.144378662109375, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -97.78176879882812, + "loss": 1.3317, + "margin_dpo/margin_mean": 5.610658168792725, + "margin_dpo/margin_std": 5.301271438598633, + "step": 54 + }, + { + "KL/chosen_KL_mean": -1.0718555450439453, + "KL/mean": -4.378843307495117, + "KL/rejected_KL_mean": -7.685829162597656, + "KL/std": 6.400544166564941, + "epoch": 0.08076358296622614, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.613970756530762, + "fcm_dpo/q_t": 0.4835028052330017, + "grad_norm": 9.306718826293945, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.6270061731338501, + "logits/rejected": -0.5628513097763062, + "logps/chosen": -63.68708801269531, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -96.67933654785156, + "loss": 1.3228, + "margin_dpo/margin_mean": 6.6139702796936035, + "margin_dpo/margin_std": 7.726709365844727, + "step": 55 + }, + { + "KL/chosen_KL_mean": -0.8420734405517578, + "KL/mean": -4.043349266052246, + "KL/rejected_KL_mean": -7.24462890625, + "KL/std": 6.373098850250244, + "epoch": 0.08223201174743025, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.402560234069824, + "fcm_dpo/q_t": 0.4840297996997833, + "grad_norm": 8.995559692382812, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.6062008142471313, + "logits/rejected": -0.563714861869812, + "logps/chosen": -58.774803161621094, + "logps/ref_chosen": -57.9327278137207, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -101.4190673828125, + "loss": 1.3248, + "margin_dpo/margin_mean": 6.402560234069824, + "margin_dpo/margin_std": 7.6934638023376465, + "step": 56 + }, + { + "KL/chosen_KL_mean": -0.9112758636474609, + "KL/mean": -4.525204181671143, + "KL/rejected_KL_mean": -8.13912582397461, + "KL/std": 6.3168745040893555, + "epoch": 0.08370044052863436, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.227848052978516, + "fcm_dpo/q_t": 0.48196300864219666, + "grad_norm": 9.872321128845215, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.5837876200675964, + "logits/rejected": -0.5559124946594238, + "logps/chosen": -71.40656280517578, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -103.70458221435547, + "loss": 1.3166, + "margin_dpo/margin_mean": 7.227848052978516, + "margin_dpo/margin_std": 7.028669834136963, + "step": 57 + }, + { + "KL/chosen_KL_mean": -1.2650394439697266, + "KL/mean": -5.307687759399414, + "KL/rejected_KL_mean": -9.350334167480469, + "KL/std": 7.259403228759766, + "epoch": 0.08516886930983847, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.08529281616211, + "fcm_dpo/q_t": 0.4798462390899658, + "grad_norm": 10.019336700439453, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.5869804620742798, + "logits/rejected": -0.5100945830345154, + "logps/chosen": -63.397979736328125, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -93.9676284790039, + "loss": 1.309, + "margin_dpo/margin_mean": 8.08529281616211, + "margin_dpo/margin_std": 8.671724319458008, + "step": 58 + }, + { + "KL/chosen_KL_mean": -1.8158931732177734, + "KL/mean": -6.279197692871094, + "KL/rejected_KL_mean": -10.742504119873047, + "KL/std": 8.032249450683594, + "epoch": 0.08663729809104258, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.926612854003906, + "fcm_dpo/q_t": 0.4777594804763794, + "grad_norm": 11.210515975952148, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6375648379325867, + "logits/rejected": -0.5986994504928589, + "logps/chosen": -53.74842071533203, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -99.62770080566406, + "loss": 1.3012, + "margin_dpo/margin_mean": 8.926612854003906, + "margin_dpo/margin_std": 9.161856651306152, + "step": 59 + }, + { + "KL/chosen_KL_mean": -3.208467483520508, + "KL/mean": -6.759184837341309, + "KL/rejected_KL_mean": -10.309898376464844, + "KL/std": 7.358757972717285, + "epoch": 0.0881057268722467, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.1014251708984375, + "fcm_dpo/q_t": 0.4822811782360077, + "grad_norm": 9.682544708251953, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.6127077341079712, + "logits/rejected": -0.5527953505516052, + "logps/chosen": -64.1506576538086, + "logps/ref_chosen": -60.94218826293945, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -95.70329284667969, + "loss": 1.3183, + "margin_dpo/margin_mean": 7.101426124572754, + "margin_dpo/margin_std": 8.231400489807129, + "step": 60 + }, + { + "KL/chosen_KL_mean": -1.9628067016601562, + "KL/mean": -6.415502548217773, + "KL/rejected_KL_mean": -10.86819839477539, + "KL/std": 9.966720581054688, + "epoch": 0.08957415565345081, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.9053955078125, + "fcm_dpo/q_t": 0.47791624069213867, + "grad_norm": 10.344194412231445, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.624599814414978, + "logits/rejected": -0.5915525555610657, + "logps/chosen": -62.59632873535156, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -100.72069549560547, + "loss": 1.3032, + "margin_dpo/margin_mean": 8.9053955078125, + "margin_dpo/margin_std": 12.48222541809082, + "step": 61 + }, + { + "KL/chosen_KL_mean": -2.1025753021240234, + "KL/mean": -5.457090854644775, + "KL/rejected_KL_mean": -8.811607360839844, + "KL/std": 7.392644882202148, + "epoch": 0.09104258443465492, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.709033012390137, + "fcm_dpo/q_t": 0.4832811653614044, + "grad_norm": 8.395082473754883, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.6115210056304932, + "logits/rejected": -0.5771872401237488, + "logps/chosen": -58.25334930419922, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -84.3778076171875, + "loss": 1.3225, + "margin_dpo/margin_mean": 6.709033012390137, + "margin_dpo/margin_std": 9.149477005004883, + "step": 62 + }, + { + "KL/chosen_KL_mean": -3.5704593658447266, + "KL/mean": -8.119380950927734, + "KL/rejected_KL_mean": -12.66830062866211, + "KL/std": 9.557559967041016, + "epoch": 0.09251101321585903, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 9.097840309143066, + "fcm_dpo/q_t": 0.4773363471031189, + "grad_norm": 10.433484077453613, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.6092942953109741, + "logits/rejected": -0.5663588047027588, + "logps/chosen": -76.71784973144531, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -110.27836608886719, + "loss": 1.3, + "margin_dpo/margin_mean": 9.09783935546875, + "margin_dpo/margin_std": 10.06234359741211, + "step": 63 + }, + { + "KL/chosen_KL_mean": -1.6353092193603516, + "KL/mean": -7.144941329956055, + "KL/rejected_KL_mean": -12.65457534790039, + "KL/std": 10.44306755065918, + "epoch": 0.09397944199706314, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 11.019262313842773, + "fcm_dpo/q_t": 0.4726361632347107, + "grad_norm": 11.34101390838623, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.5932717323303223, + "logits/rejected": -0.5615238547325134, + "logps/chosen": -55.6339111328125, + "logps/ref_chosen": -53.998600006103516, + "logps/ref_rejected": -93.53019714355469, + "logps/rejected": -106.18477630615234, + "loss": 1.2835, + "margin_dpo/margin_mean": 11.019262313842773, + "margin_dpo/margin_std": 12.875155448913574, + "step": 64 + }, + { + "KL/chosen_KL_mean": -4.444938659667969, + "KL/mean": -10.106355667114258, + "KL/rejected_KL_mean": -15.767776489257812, + "KL/std": 11.982595443725586, + "epoch": 0.09544787077826726, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 11.32283878326416, + "fcm_dpo/q_t": 0.47184616327285767, + "grad_norm": 11.773619651794434, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6665968298912048, + "logits/rejected": -0.6556574106216431, + "logps/chosen": -69.28094482421875, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -125.7142333984375, + "loss": 1.2808, + "margin_dpo/margin_mean": 11.322837829589844, + "margin_dpo/margin_std": 13.313655853271484, + "step": 65 + }, + { + "KL/chosen_KL_mean": -4.254911422729492, + "KL/mean": -9.470987319946289, + "KL/rejected_KL_mean": -14.687057495117188, + "KL/std": 10.89914321899414, + "epoch": 0.09691629955947137, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.432148933410645, + "fcm_dpo/q_t": 0.4741200804710388, + "grad_norm": 10.362818717956543, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6421518325805664, + "logits/rejected": -0.6104958057403564, + "logps/chosen": -55.69843673706055, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629913330078, + "logps/rejected": -90.32335662841797, + "loss": 1.2892, + "margin_dpo/margin_mean": 10.432148933410645, + "margin_dpo/margin_std": 13.316844940185547, + "step": 66 + }, + { + "KL/chosen_KL_mean": -4.397365570068359, + "KL/mean": -9.999573707580566, + "KL/rejected_KL_mean": -15.601787567138672, + "KL/std": 12.074445724487305, + "epoch": 0.09838472834067548, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 11.204422950744629, + "fcm_dpo/q_t": 0.4722447097301483, + "grad_norm": 10.62942886352539, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.5891748070716858, + "logits/rejected": -0.5434067249298096, + "logps/chosen": -63.7381706237793, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78728485107422, + "logps/rejected": -88.38906860351562, + "loss": 1.2828, + "margin_dpo/margin_mean": 11.204421997070312, + "margin_dpo/margin_std": 14.561704635620117, + "step": 67 + }, + { + "KL/chosen_KL_mean": -6.524868011474609, + "KL/mean": -11.753357887268066, + "KL/rejected_KL_mean": -16.981849670410156, + "KL/std": 11.021953582763672, + "epoch": 0.09985315712187959, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.456976890563965, + "fcm_dpo/q_t": 0.47402510046958923, + "grad_norm": 9.816877365112305, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6517592668533325, + "logits/rejected": -0.586235761642456, + "logps/chosen": -71.73069763183594, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -94.18910217285156, + "loss": 1.2886, + "margin_dpo/margin_mean": 10.456975936889648, + "margin_dpo/margin_std": 12.803793907165527, + "step": 68 + }, + { + "KL/chosen_KL_mean": -8.00680160522461, + "KL/mean": -15.686670303344727, + "KL/rejected_KL_mean": -23.366546630859375, + "KL/std": 15.096254348754883, + "epoch": 0.1013215859030837, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 15.35973834991455, + "fcm_dpo/q_t": 0.46209076046943665, + "grad_norm": 13.197538375854492, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.6669565439224243, + "logits/rejected": -0.6464905738830566, + "logps/chosen": -67.82604217529297, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -126.75540924072266, + "loss": 1.2463, + "margin_dpo/margin_mean": 15.359739303588867, + "margin_dpo/margin_std": 17.261123657226562, + "step": 69 + }, + { + "KL/chosen_KL_mean": -10.887544631958008, + "KL/mean": -18.979265213012695, + "KL/rejected_KL_mean": -27.07099151611328, + "KL/std": 19.12816619873047, + "epoch": 0.1027900146842878, + "fcm_dpo/beta": 0.009999999776482582, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 16.18343734741211, + "fcm_dpo/q_t": 0.46015501022338867, + "grad_norm": 12.805341720581055, + "learning_rate": 5e-07, + "logits/chosen": -0.629044771194458, + "logits/rejected": -0.5975900888442993, + "logps/chosen": -72.81818389892578, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.06078338623047, + "logps/rejected": -118.13177490234375, + "loss": 1.2415, + "margin_dpo/margin_mean": 16.183441162109375, + "margin_dpo/margin_std": 20.474119186401367, + "step": 70 + }, + { + "KL/chosen_KL_mean": -11.302818298339844, + "KL/mean": -21.22766876220703, + "KL/rejected_KL_mean": -31.152530670166016, + "KL/std": 20.86431121826172, + "epoch": 0.10425844346549193, + "fcm_dpo/beta": 0.010172600857913494, + "fcm_dpo/delta": 0.08556444197893143, + "fcm_dpo/margin": 19.849708557128906, + "fcm_dpo/q_t": 0.45123565196990967, + "grad_norm": 14.385030746459961, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.7001615762710571, + "logits/rejected": -0.6704069972038269, + "logps/chosen": -73.05315399169922, + "logps/ref_chosen": -61.750335693359375, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -128.48915100097656, + "loss": 1.2088, + "margin_dpo/margin_mean": 19.849708557128906, + "margin_dpo/margin_std": 23.028926849365234, + "step": 71 + }, + { + "KL/chosen_KL_mean": -13.121393203735352, + "KL/mean": -22.910423278808594, + "KL/rejected_KL_mean": -32.6994514465332, + "KL/std": 21.885107040405273, + "epoch": 0.10572687224669604, + "fcm_dpo/beta": 0.010172600857913494, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 19.57806396484375, + "fcm_dpo/q_t": 0.451375812292099, + "grad_norm": 13.355144500732422, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6597040891647339, + "logits/rejected": -0.6241432428359985, + "logps/chosen": -79.1748046875, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -127.98643493652344, + "loss": 1.2156, + "margin_dpo/margin_mean": 19.57806396484375, + "margin_dpo/margin_std": 27.155168533325195, + "step": 72 + }, + { + "KL/chosen_KL_mean": -15.171804428100586, + "KL/mean": -25.175312042236328, + "KL/rejected_KL_mean": -35.178829193115234, + "KL/std": 27.357677459716797, + "epoch": 0.10719530102790015, + "fcm_dpo/beta": 0.010345780290663242, + "fcm_dpo/delta": 0.08440417796373367, + "fcm_dpo/margin": 20.00701904296875, + "fcm_dpo/q_t": 0.4510188698768616, + "grad_norm": 14.032218933105469, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6953517198562622, + "logits/rejected": -0.6581678986549377, + "logps/chosen": -81.4280776977539, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613098144531, + "logps/rejected": -125.63496398925781, + "loss": 1.2222, + "margin_dpo/margin_mean": 20.00701904296875, + "margin_dpo/margin_std": 35.20468521118164, + "step": 73 + }, + { + "KL/chosen_KL_mean": -15.799406051635742, + "KL/mean": -27.360076904296875, + "KL/rejected_KL_mean": -38.92074203491211, + "KL/std": 30.7703857421875, + "epoch": 0.10866372980910426, + "fcm_dpo/beta": 0.010593706741929054, + "fcm_dpo/delta": 0.15896809101104736, + "fcm_dpo/margin": 23.121337890625, + "fcm_dpo/q_t": 0.4444906413555145, + "grad_norm": 15.119664192199707, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6417176723480225, + "logits/rejected": -0.6399871706962585, + "logps/chosen": -69.22428894042969, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -134.86767578125, + "loss": 1.1959, + "margin_dpo/margin_mean": 23.121337890625, + "margin_dpo/margin_std": 39.1754150390625, + "step": 74 + }, + { + "KL/chosen_KL_mean": -15.983125686645508, + "KL/mean": -32.20029067993164, + "KL/rejected_KL_mean": -48.417449951171875, + "KL/std": 31.59003448486328, + "epoch": 0.11013215859030837, + "fcm_dpo/beta": 0.010802132077515125, + "fcm_dpo/delta": 0.050960563123226166, + "fcm_dpo/margin": 32.434326171875, + "fcm_dpo/q_t": 0.41791272163391113, + "grad_norm": 17.095163345336914, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6758487224578857, + "logits/rejected": -0.6743229627609253, + "logps/chosen": -67.84478759765625, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25398254394531, + "logps/rejected": -159.6714324951172, + "loss": 1.1055, + "margin_dpo/margin_mean": 32.434326171875, + "margin_dpo/margin_std": 37.082366943359375, + "step": 75 + }, + { + "KL/chosen_KL_mean": -18.9597225189209, + "KL/mean": -28.698415756225586, + "KL/rejected_KL_mean": -38.43710708618164, + "KL/std": 25.665380477905273, + "epoch": 0.11160058737151249, + "fcm_dpo/beta": 0.010884184390306473, + "fcm_dpo/delta": 0.08708269149065018, + "fcm_dpo/margin": 19.47739028930664, + "fcm_dpo/q_t": 0.4486614465713501, + "grad_norm": 15.436707496643066, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.6917558908462524, + "logits/rejected": -0.6552442312240601, + "logps/chosen": -72.22576141357422, + "logps/ref_chosen": -53.26603698730469, + "logps/ref_rejected": -78.21662902832031, + "logps/rejected": -116.65373229980469, + "loss": 1.2096, + "margin_dpo/margin_mean": 19.47739028930664, + "margin_dpo/margin_std": 28.66197395324707, + "step": 76 + }, + { + "KL/chosen_KL_mean": -17.53479766845703, + "KL/mean": -34.09250259399414, + "KL/rejected_KL_mean": -50.65019989013672, + "KL/std": 34.816802978515625, + "epoch": 0.1130690161527166, + "fcm_dpo/beta": 0.011013032868504524, + "fcm_dpo/delta": 0.036607466638088226, + "fcm_dpo/margin": 33.11540603637695, + "fcm_dpo/q_t": 0.4138905107975006, + "grad_norm": 17.87863540649414, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.6697767376899719, + "logits/rejected": -0.637236475944519, + "logps/chosen": -75.63147735595703, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -144.42381286621094, + "loss": 1.1015, + "margin_dpo/margin_mean": 33.11540603637695, + "margin_dpo/margin_std": 40.183067321777344, + "step": 77 + }, + { + "KL/chosen_KL_mean": -18.032699584960938, + "KL/mean": -29.887676239013672, + "KL/rejected_KL_mean": -41.74265670776367, + "KL/std": 29.635108947753906, + "epoch": 0.1145374449339207, + "fcm_dpo/beta": 0.011299570091068745, + "fcm_dpo/delta": 0.1355305016040802, + "fcm_dpo/margin": 23.709951400756836, + "fcm_dpo/q_t": 0.43751174211502075, + "grad_norm": 15.848010063171387, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6312749981880188, + "logits/rejected": -0.6117902994155884, + "logps/chosen": -73.646484375, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -126.67701721191406, + "loss": 1.1749, + "margin_dpo/margin_mean": 23.709949493408203, + "margin_dpo/margin_std": 35.13151168823242, + "step": 78 + }, + { + "KL/chosen_KL_mean": -19.16200828552246, + "KL/mean": -31.912139892578125, + "KL/rejected_KL_mean": -44.66227340698242, + "KL/std": 31.9647274017334, + "epoch": 0.11600587371512482, + "fcm_dpo/beta": 0.01154954545199871, + "fcm_dpo/delta": 0.10868433862924576, + "fcm_dpo/margin": 25.500267028808594, + "fcm_dpo/q_t": 0.4317125082015991, + "grad_norm": 15.949761390686035, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.6368188858032227, + "logits/rejected": -0.6110581755638123, + "logps/chosen": -74.61249542236328, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -132.30984497070312, + "loss": 1.1611, + "margin_dpo/margin_mean": 25.500267028808594, + "margin_dpo/margin_std": 38.79698181152344, + "step": 79 + }, + { + "KL/chosen_KL_mean": -23.745559692382812, + "KL/mean": -37.06023406982422, + "KL/rejected_KL_mean": -50.374908447265625, + "KL/std": 37.243316650390625, + "epoch": 0.11747430249632893, + "fcm_dpo/beta": 0.011725610122084618, + "fcm_dpo/delta": 0.09062545001506805, + "fcm_dpo/margin": 26.62934112548828, + "fcm_dpo/q_t": 0.43057841062545776, + "grad_norm": 17.33467674255371, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.7032785415649414, + "logits/rejected": -0.6706737279891968, + "logps/chosen": -82.26484680175781, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -137.9224090576172, + "loss": 1.163, + "margin_dpo/margin_mean": 26.62934112548828, + "margin_dpo/margin_std": 43.98291778564453, + "step": 80 + }, + { + "KL/chosen_KL_mean": -23.4375, + "KL/mean": -41.379188537597656, + "KL/rejected_KL_mean": -59.32087707519531, + "KL/std": 43.66413497924805, + "epoch": 0.11894273127753303, + "fcm_dpo/beta": 0.011759042739868164, + "fcm_dpo/delta": -0.023072410374879837, + "fcm_dpo/margin": 35.88337326049805, + "fcm_dpo/q_t": 0.4062455892562866, + "grad_norm": 18.646865844726562, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.6688940525054932, + "logits/rejected": -0.6473867893218994, + "logps/chosen": -89.88636779785156, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -188.98358154296875, + "loss": 1.0918, + "margin_dpo/margin_mean": 35.88337326049805, + "margin_dpo/margin_std": 52.36148452758789, + "step": 81 + }, + { + "KL/chosen_KL_mean": -26.48457145690918, + "KL/mean": -40.68313217163086, + "KL/rejected_KL_mean": -54.88169479370117, + "KL/std": 39.783546447753906, + "epoch": 0.12041116005873716, + "fcm_dpo/beta": 0.011723190546035767, + "fcm_dpo/delta": -0.040955908596515656, + "fcm_dpo/margin": 28.397132873535156, + "fcm_dpo/q_t": 0.42619970440864563, + "grad_norm": 18.904956817626953, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6247996091842651, + "logits/rejected": -0.6217905282974243, + "logps/chosen": -78.71695709228516, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -145.62493896484375, + "loss": 1.1808, + "margin_dpo/margin_mean": 28.397132873535156, + "margin_dpo/margin_std": 55.747474670410156, + "step": 82 + }, + { + "KL/chosen_KL_mean": -27.066598892211914, + "KL/mean": -45.09041213989258, + "KL/rejected_KL_mean": -63.11423110961914, + "KL/std": 41.50777816772461, + "epoch": 0.12187958883994127, + "fcm_dpo/beta": 0.011612952686846256, + "fcm_dpo/delta": -0.019600681960582733, + "fcm_dpo/margin": 36.04762649536133, + "fcm_dpo/q_t": 0.4054431617259979, + "grad_norm": 19.01769256591797, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6414648294448853, + "logits/rejected": -0.6217591762542725, + "logps/chosen": -82.89398193359375, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71589660644531, + "logps/rejected": -166.83013916015625, + "loss": 1.089, + "margin_dpo/margin_mean": 36.04762268066406, + "margin_dpo/margin_std": 50.104408264160156, + "step": 83 + }, + { + "KL/chosen_KL_mean": -28.04334831237793, + "KL/mean": -41.49994659423828, + "KL/rejected_KL_mean": -54.956539154052734, + "KL/std": 35.679141998291016, + "epoch": 0.12334801762114538, + "fcm_dpo/beta": 0.011577482335269451, + "fcm_dpo/delta": -0.022414665669202805, + "fcm_dpo/margin": 26.913192749023438, + "fcm_dpo/q_t": 0.4257325530052185, + "grad_norm": 18.095876693725586, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6499658823013306, + "logits/rejected": -0.6158007383346558, + "logps/chosen": -95.21951293945312, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -142.25514221191406, + "loss": 1.1425, + "margin_dpo/margin_mean": 26.913192749023438, + "margin_dpo/margin_std": 35.351402282714844, + "step": 84 + }, + { + "KL/chosen_KL_mean": -27.43400764465332, + "KL/mean": -41.591861724853516, + "KL/rejected_KL_mean": -55.74971389770508, + "KL/std": 35.18168258666992, + "epoch": 0.12481644640234948, + "fcm_dpo/beta": 0.011758394539356232, + "fcm_dpo/delta": 0.06842872500419617, + "fcm_dpo/margin": 28.315706253051758, + "fcm_dpo/q_t": 0.42318564653396606, + "grad_norm": 18.253501892089844, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6255546808242798, + "logits/rejected": -0.5947661995887756, + "logps/chosen": -85.84062194824219, + "logps/ref_chosen": -58.4066162109375, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -134.38851928710938, + "loss": 1.1441, + "margin_dpo/margin_mean": 28.31570816040039, + "margin_dpo/margin_std": 43.07608413696289, + "step": 85 + }, + { + "KL/chosen_KL_mean": -36.229835510253906, + "KL/mean": -51.37822723388672, + "KL/rejected_KL_mean": -66.526611328125, + "KL/std": 52.038185119628906, + "epoch": 0.1262848751835536, + "fcm_dpo/beta": 0.011856161057949066, + "fcm_dpo/delta": 0.041851602494716644, + "fcm_dpo/margin": 30.296781539916992, + "fcm_dpo/q_t": 0.42889153957366943, + "grad_norm": 25.211519241333008, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.645729124546051, + "logits/rejected": -0.6413381099700928, + "logps/chosen": -92.36730194091797, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -154.64825439453125, + "loss": 1.2181, + "margin_dpo/margin_mean": 30.29677963256836, + "margin_dpo/margin_std": 73.2314453125, + "step": 86 + }, + { + "KL/chosen_KL_mean": -35.271644592285156, + "KL/mean": -54.79254913330078, + "KL/rejected_KL_mean": -74.31346130371094, + "KL/std": 52.29801940917969, + "epoch": 0.1277533039647577, + "fcm_dpo/beta": 0.011678045615553856, + "fcm_dpo/delta": -0.059305619448423386, + "fcm_dpo/margin": 39.041812896728516, + "fcm_dpo/q_t": 0.39871087670326233, + "grad_norm": 21.151241302490234, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.6645753383636475, + "logits/rejected": -0.6533565521240234, + "logps/chosen": -90.9077377319336, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -169.78103637695312, + "loss": 1.1169, + "margin_dpo/margin_mean": 39.041812896728516, + "margin_dpo/margin_std": 65.63897705078125, + "step": 87 + }, + { + "KL/chosen_KL_mean": -44.63642883300781, + "KL/mean": -59.331417083740234, + "KL/rejected_KL_mean": -74.02639770507812, + "KL/std": 56.308712005615234, + "epoch": 0.12922173274596183, + "fcm_dpo/beta": 0.011739738285541534, + "fcm_dpo/delta": 0.05674154311418533, + "fcm_dpo/margin": 29.389965057373047, + "fcm_dpo/q_t": 0.42796188592910767, + "grad_norm": 22.641603469848633, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.6030087471008301, + "logits/rejected": -0.6048742532730103, + "logps/chosen": -118.30757904052734, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -180.73489379882812, + "loss": 1.2051, + "margin_dpo/margin_mean": 29.389965057373047, + "margin_dpo/margin_std": 62.56239318847656, + "step": 88 + }, + { + "KL/chosen_KL_mean": -29.5759334564209, + "KL/mean": -45.80217742919922, + "KL/rejected_KL_mean": -62.02842712402344, + "KL/std": 47.946502685546875, + "epoch": 0.13069016152716592, + "fcm_dpo/beta": 0.011899597942829132, + "fcm_dpo/delta": 0.014035461470484734, + "fcm_dpo/margin": 32.45248794555664, + "fcm_dpo/q_t": 0.4130520224571228, + "grad_norm": 20.236740112304688, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.6709840297698975, + "logits/rejected": -0.6467102766036987, + "logps/chosen": -90.20085144042969, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -144.11196899414062, + "loss": 1.1371, + "margin_dpo/margin_mean": 32.452484130859375, + "margin_dpo/margin_std": 56.894569396972656, + "step": 89 + }, + { + "KL/chosen_KL_mean": -33.55852508544922, + "KL/mean": -58.233924865722656, + "KL/rejected_KL_mean": -82.90933227539062, + "KL/std": 63.781768798828125, + "epoch": 0.13215859030837004, + "fcm_dpo/beta": 0.0116573516279459, + "fcm_dpo/delta": -0.18619467318058014, + "fcm_dpo/margin": 49.350799560546875, + "fcm_dpo/q_t": 0.3860289454460144, + "grad_norm": 20.391284942626953, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.5895090103149414, + "logits/rejected": -0.614214301109314, + "logps/chosen": -86.84383392333984, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -194.45404052734375, + "loss": 1.0805, + "margin_dpo/margin_mean": 49.350799560546875, + "margin_dpo/margin_std": 80.73963928222656, + "step": 90 + }, + { + "KL/chosen_KL_mean": -36.51939392089844, + "KL/mean": -53.32404327392578, + "KL/rejected_KL_mean": -70.12869262695312, + "KL/std": 47.886573791503906, + "epoch": 0.13362701908957417, + "fcm_dpo/beta": 0.011482559144496918, + "fcm_dpo/delta": 0.014481155201792717, + "fcm_dpo/margin": 33.60929870605469, + "fcm_dpo/q_t": 0.41819268465042114, + "grad_norm": 21.17986488342285, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6286755800247192, + "logits/rejected": -0.6033735275268555, + "logps/chosen": -98.32235717773438, + "logps/ref_chosen": -61.802955627441406, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -158.00265502929688, + "loss": 1.1619, + "margin_dpo/margin_mean": 33.60929870605469, + "margin_dpo/margin_std": 65.38414764404297, + "step": 91 + }, + { + "KL/chosen_KL_mean": -30.792192459106445, + "KL/mean": -48.84156036376953, + "KL/rejected_KL_mean": -66.89093017578125, + "KL/std": 49.2903938293457, + "epoch": 0.13509544787077826, + "fcm_dpo/beta": 0.011489994823932648, + "fcm_dpo/delta": -0.015798617154359818, + "fcm_dpo/margin": 36.09873580932617, + "fcm_dpo/q_t": 0.4099717140197754, + "grad_norm": 19.466535568237305, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.6480433344841003, + "logits/rejected": -0.6293787360191345, + "logps/chosen": -82.4329605102539, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -144.77210998535156, + "loss": 1.1196, + "margin_dpo/margin_mean": 36.09873580932617, + "margin_dpo/margin_std": 59.55793762207031, + "step": 92 + }, + { + "KL/chosen_KL_mean": -34.3369140625, + "KL/mean": -51.498043060302734, + "KL/rejected_KL_mean": -68.65916442871094, + "KL/std": 43.49789810180664, + "epoch": 0.13656387665198239, + "fcm_dpo/beta": 0.01147179864346981, + "fcm_dpo/delta": 0.006311129778623581, + "fcm_dpo/margin": 34.32225799560547, + "fcm_dpo/q_t": 0.41234347224235535, + "grad_norm": 21.00588607788086, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.668390154838562, + "logits/rejected": -0.632922887802124, + "logps/chosen": -86.86614990234375, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.16075134277344, + "logps/rejected": -145.81991577148438, + "loss": 1.1109, + "margin_dpo/margin_mean": 34.32225799560547, + "margin_dpo/margin_std": 52.0517578125, + "step": 93 + }, + { + "KL/chosen_KL_mean": -36.46757507324219, + "KL/mean": -57.51295471191406, + "KL/rejected_KL_mean": -78.55833435058594, + "KL/std": 52.84346008300781, + "epoch": 0.13803230543318648, + "fcm_dpo/beta": 0.011251532472670078, + "fcm_dpo/delta": -0.07815787196159363, + "fcm_dpo/margin": 42.09075164794922, + "fcm_dpo/q_t": 0.3950212597846985, + "grad_norm": 19.056589126586914, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6087779998779297, + "logits/rejected": -0.6042633056640625, + "logps/chosen": -97.690185546875, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -178.15737915039062, + "loss": 1.0606, + "margin_dpo/margin_mean": 42.09075164794922, + "margin_dpo/margin_std": 56.86457061767578, + "step": 94 + }, + { + "KL/chosen_KL_mean": -35.560211181640625, + "KL/mean": -52.13538360595703, + "KL/rejected_KL_mean": -68.71056365966797, + "KL/std": 53.81283187866211, + "epoch": 0.1395007342143906, + "fcm_dpo/beta": 0.011258168146014214, + "fcm_dpo/delta": 0.02752673253417015, + "fcm_dpo/margin": 33.15034484863281, + "fcm_dpo/q_t": 0.41526269912719727, + "grad_norm": 19.388639450073242, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.6361397504806519, + "logits/rejected": -0.6155867576599121, + "logps/chosen": -88.08385467529297, + "logps/ref_chosen": -52.523643493652344, + "logps/ref_rejected": -75.8803482055664, + "logps/rejected": -144.59091186523438, + "loss": 1.1443, + "margin_dpo/margin_mean": 33.15034484863281, + "margin_dpo/margin_std": 56.809608459472656, + "step": 95 + }, + { + "KL/chosen_KL_mean": -35.88848876953125, + "KL/mean": -58.62786865234375, + "KL/rejected_KL_mean": -81.36726379394531, + "KL/std": 55.729679107666016, + "epoch": 0.14096916299559473, + "fcm_dpo/beta": 0.011056499555706978, + "fcm_dpo/delta": -0.11031479388475418, + "fcm_dpo/margin": 45.47876739501953, + "fcm_dpo/q_t": 0.3921157121658325, + "grad_norm": 19.741348266601562, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.629560112953186, + "logits/rejected": -0.6098443269729614, + "logps/chosen": -98.04545593261719, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -177.96328735351562, + "loss": 1.0532, + "margin_dpo/margin_mean": 45.4787712097168, + "margin_dpo/margin_std": 63.79114532470703, + "step": 96 + }, + { + "KL/chosen_KL_mean": -36.149559020996094, + "KL/mean": -53.12095642089844, + "KL/rejected_KL_mean": -70.09235382080078, + "KL/std": 48.205299377441406, + "epoch": 0.14243759177679882, + "fcm_dpo/beta": 0.01119668036699295, + "fcm_dpo/delta": 0.019375190138816833, + "fcm_dpo/margin": 33.94279861450195, + "fcm_dpo/q_t": 0.41301482915878296, + "grad_norm": 20.09777069091797, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.5712728500366211, + "logits/rejected": -0.539288341999054, + "logps/chosen": -90.79592895507812, + "logps/ref_chosen": -54.646366119384766, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -147.05709838867188, + "loss": 1.116, + "margin_dpo/margin_mean": 33.94279479980469, + "margin_dpo/margin_std": 48.3692626953125, + "step": 97 + }, + { + "KL/chosen_KL_mean": -46.89853286743164, + "KL/mean": -66.17620849609375, + "KL/rejected_KL_mean": -85.4538803100586, + "KL/std": 56.045082092285156, + "epoch": 0.14390602055800295, + "fcm_dpo/beta": 0.01105651818215847, + "fcm_dpo/delta": -0.02756763994693756, + "fcm_dpo/margin": 38.55535125732422, + "fcm_dpo/q_t": 0.4013606309890747, + "grad_norm": 22.645526885986328, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6146658658981323, + "logits/rejected": -0.6087555885314941, + "logps/chosen": -112.15716552734375, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -181.9813690185547, + "loss": 1.1381, + "margin_dpo/margin_mean": 38.55535125732422, + "margin_dpo/margin_std": 68.4720687866211, + "step": 98 + }, + { + "KL/chosen_KL_mean": -34.3526611328125, + "KL/mean": -59.61415481567383, + "KL/rejected_KL_mean": -84.87564086914062, + "KL/std": 57.15583038330078, + "epoch": 0.14537444933920704, + "fcm_dpo/beta": 0.010768149048089981, + "fcm_dpo/delta": -0.1532631367444992, + "fcm_dpo/margin": 50.522987365722656, + "fcm_dpo/q_t": 0.38575196266174316, + "grad_norm": 18.111738204956055, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.5844358205795288, + "logits/rejected": -0.5914009213447571, + "logps/chosen": -79.99114990234375, + "logps/ref_chosen": -45.638484954833984, + "logps/ref_rejected": -86.43793487548828, + "logps/rejected": -171.31356811523438, + "loss": 1.0399, + "margin_dpo/margin_mean": 50.522987365722656, + "margin_dpo/margin_std": 72.46966552734375, + "step": 99 + }, + { + "KL/chosen_KL_mean": -39.64739990234375, + "KL/mean": -51.475440979003906, + "KL/rejected_KL_mean": -63.3034782409668, + "KL/std": 47.796791076660156, + "epoch": 0.14684287812041116, + "fcm_dpo/beta": 0.010749414563179016, + "fcm_dpo/delta": 0.02157057449221611, + "fcm_dpo/margin": 23.65607452392578, + "fcm_dpo/q_t": 0.44148170948028564, + "grad_norm": 22.503297805786133, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.5854922533035278, + "logits/rejected": -0.5520744323730469, + "logps/chosen": -97.24137878417969, + "logps/ref_chosen": -57.59397888183594, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -137.3636932373047, + "loss": 1.2548, + "margin_dpo/margin_mean": 23.65607452392578, + "margin_dpo/margin_std": 61.108909606933594, + "step": 100 + }, + { + "KL/chosen_KL_mean": -47.286502838134766, + "KL/mean": -63.186546325683594, + "KL/rejected_KL_mean": -79.08659362792969, + "KL/std": 57.57587432861328, + "epoch": 0.14831130690161526, + "fcm_dpo/beta": 0.010717286728322506, + "fcm_dpo/delta": -0.025740258395671844, + "fcm_dpo/margin": 31.800077438354492, + "fcm_dpo/q_t": 0.4272317886352539, + "grad_norm": 21.163375854492188, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.5871464014053345, + "logits/rejected": -0.5648493766784668, + "logps/chosen": -108.93535614013672, + "logps/ref_chosen": -61.64885330200195, + "logps/ref_rejected": -83.18968200683594, + "logps/rejected": -162.27627563476562, + "loss": 1.1985, + "margin_dpo/margin_mean": 31.800079345703125, + "margin_dpo/margin_std": 68.4903793334961, + "step": 101 + }, + { + "KL/chosen_KL_mean": -49.380165100097656, + "KL/mean": -60.16246795654297, + "KL/rejected_KL_mean": -70.94477844238281, + "KL/std": 52.73678970336914, + "epoch": 0.14977973568281938, + "fcm_dpo/beta": 0.010729561559855938, + "fcm_dpo/delta": 0.011440283618867397, + "fcm_dpo/margin": 21.564611434936523, + "fcm_dpo/q_t": 0.44714266061782837, + "grad_norm": 24.271780014038086, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.6152628660202026, + "logits/rejected": -0.5813932418823242, + "logps/chosen": -113.45904541015625, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -139.13185119628906, + "loss": 1.2855, + "margin_dpo/margin_mean": 21.564613342285156, + "margin_dpo/margin_std": 64.52105712890625, + "step": 102 + }, + { + "KL/chosen_KL_mean": -41.929290771484375, + "KL/mean": -65.89997100830078, + "KL/rejected_KL_mean": -89.87065124511719, + "KL/std": 59.839744567871094, + "epoch": 0.1512481644640235, + "fcm_dpo/beta": 0.010541867464780807, + "fcm_dpo/delta": -0.11135346442461014, + "fcm_dpo/margin": 47.94136047363281, + "fcm_dpo/q_t": 0.39261913299560547, + "grad_norm": 21.387969970703125, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.5887047052383423, + "logits/rejected": -0.5597223043441772, + "logps/chosen": -103.22856903076172, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57270812988281, + "logps/rejected": -183.443359375, + "loss": 1.0745, + "margin_dpo/margin_mean": 47.94136047363281, + "margin_dpo/margin_std": 75.64704895019531, + "step": 103 + }, + { + "KL/chosen_KL_mean": -41.508384704589844, + "KL/mean": -64.53752899169922, + "KL/rejected_KL_mean": -87.56666564941406, + "KL/std": 62.41368103027344, + "epoch": 0.1527165932452276, + "fcm_dpo/beta": 0.010378319770097733, + "fcm_dpo/delta": -0.08192168176174164, + "fcm_dpo/margin": 46.05828094482422, + "fcm_dpo/q_t": 0.4016958773136139, + "grad_norm": 19.554399490356445, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.6282894015312195, + "logits/rejected": -0.6099402904510498, + "logps/chosen": -95.88116455078125, + "logps/ref_chosen": -54.372772216796875, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -177.13137817382812, + "loss": 1.104, + "margin_dpo/margin_mean": 46.05828094482422, + "margin_dpo/margin_std": 78.06277465820312, + "step": 104 + }, + { + "KL/chosen_KL_mean": -38.417266845703125, + "KL/mean": -71.04873657226562, + "KL/rejected_KL_mean": -103.68021392822266, + "KL/std": 57.34044647216797, + "epoch": 0.15418502202643172, + "fcm_dpo/beta": 0.009946699254214764, + "fcm_dpo/delta": -0.26617431640625, + "fcm_dpo/margin": 65.26294708251953, + "fcm_dpo/q_t": 0.3527500033378601, + "grad_norm": 18.77589988708496, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.5998860597610474, + "logits/rejected": -0.5840677618980408, + "logps/chosen": -93.05621337890625, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -201.6537322998047, + "loss": 0.9308, + "margin_dpo/margin_mean": 65.26294708251953, + "margin_dpo/margin_std": 64.84896087646484, + "step": 105 + }, + { + "KL/chosen_KL_mean": -43.48663330078125, + "KL/mean": -63.503273010253906, + "KL/rejected_KL_mean": -83.5199203491211, + "KL/std": 52.4485969543457, + "epoch": 0.15565345080763582, + "fcm_dpo/beta": 0.00979258120059967, + "fcm_dpo/delta": 0.008240575902163982, + "fcm_dpo/margin": 40.03329086303711, + "fcm_dpo/q_t": 0.4101172089576721, + "grad_norm": 18.440258026123047, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.5603185296058655, + "logits/rejected": -0.5446274280548096, + "logps/chosen": -98.31952667236328, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -168.74453735351562, + "loss": 1.1055, + "margin_dpo/margin_mean": 40.033287048339844, + "margin_dpo/margin_std": 56.37229919433594, + "step": 106 + }, + { + "KL/chosen_KL_mean": -51.00328063964844, + "KL/mean": -73.30986022949219, + "KL/rejected_KL_mean": -95.61644744873047, + "KL/std": 63.23443603515625, + "epoch": 0.15712187958883994, + "fcm_dpo/beta": 0.009722733870148659, + "fcm_dpo/delta": -0.0357857272028923, + "fcm_dpo/margin": 44.61316680908203, + "fcm_dpo/q_t": 0.40253520011901855, + "grad_norm": 19.489307403564453, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.5885684490203857, + "logits/rejected": -0.549545168876648, + "logps/chosen": -120.71109008789062, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -190.35595703125, + "loss": 1.0986, + "margin_dpo/margin_mean": 44.61316680908203, + "margin_dpo/margin_std": 67.18595886230469, + "step": 107 + }, + { + "KL/chosen_KL_mean": -43.193382263183594, + "KL/mean": -71.44862365722656, + "KL/rejected_KL_mean": -99.703857421875, + "KL/std": 69.26988220214844, + "epoch": 0.15859030837004406, + "fcm_dpo/beta": 0.009545030072331429, + "fcm_dpo/delta": -0.14720328152179718, + "fcm_dpo/margin": 56.510475158691406, + "fcm_dpo/q_t": 0.3897445499897003, + "grad_norm": 17.88391876220703, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.5926969051361084, + "logits/rejected": -0.5816439390182495, + "logps/chosen": -99.2032699584961, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -195.4998779296875, + "loss": 1.0727, + "margin_dpo/margin_mean": 56.510475158691406, + "margin_dpo/margin_std": 89.7379150390625, + "step": 108 + }, + { + "KL/chosen_KL_mean": -40.947486877441406, + "KL/mean": -69.35021209716797, + "KL/rejected_KL_mean": -97.7529296875, + "KL/std": 64.73497772216797, + "epoch": 0.16005873715124816, + "fcm_dpo/beta": 0.009237101301550865, + "fcm_dpo/delta": -0.13259612023830414, + "fcm_dpo/margin": 56.80544662475586, + "fcm_dpo/q_t": 0.3831191956996918, + "grad_norm": 20.214975357055664, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.5185987949371338, + "logits/rejected": -0.4973178505897522, + "logps/chosen": -103.83297729492188, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -196.43865966796875, + "loss": 1.0587, + "margin_dpo/margin_mean": 56.805450439453125, + "margin_dpo/margin_std": 82.919921875, + "step": 109 + }, + { + "KL/chosen_KL_mean": -41.9679069519043, + "KL/mean": -67.5267333984375, + "KL/rejected_KL_mean": -93.08556365966797, + "KL/std": 64.46952819824219, + "epoch": 0.16152716593245228, + "fcm_dpo/beta": 0.009010251611471176, + "fcm_dpo/delta": -0.06750426441431046, + "fcm_dpo/margin": 51.11765670776367, + "fcm_dpo/q_t": 0.39977186918258667, + "grad_norm": 16.951026916503906, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.5459502339363098, + "logits/rejected": -0.5163878798484802, + "logps/chosen": -100.72158813476562, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -172.8355712890625, + "loss": 1.1026, + "margin_dpo/margin_mean": 51.11766052246094, + "margin_dpo/margin_std": 78.87454223632812, + "step": 110 + }, + { + "KL/chosen_KL_mean": -46.687034606933594, + "KL/mean": -75.8902816772461, + "KL/rejected_KL_mean": -105.0935287475586, + "KL/std": 71.61009979248047, + "epoch": 0.16299559471365638, + "fcm_dpo/beta": 0.008878624066710472, + "fcm_dpo/delta": -0.12616059184074402, + "fcm_dpo/margin": 58.406497955322266, + "fcm_dpo/q_t": 0.3876160979270935, + "grad_norm": 19.642478942871094, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.5734955072402954, + "logits/rejected": -0.5520858764648438, + "logps/chosen": -115.31114196777344, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -203.52239990234375, + "loss": 1.0866, + "margin_dpo/margin_mean": 58.40650177001953, + "margin_dpo/margin_std": 92.70616149902344, + "step": 111 + }, + { + "KL/chosen_KL_mean": -41.41010284423828, + "KL/mean": -58.663936614990234, + "KL/rejected_KL_mean": -75.91777038574219, + "KL/std": 46.183799743652344, + "epoch": 0.1644640234948605, + "fcm_dpo/beta": 0.008953899145126343, + "fcm_dpo/delta": 0.09397280961275101, + "fcm_dpo/margin": 34.507667541503906, + "fcm_dpo/q_t": 0.42883390188217163, + "grad_norm": 16.9028263092041, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.5514860153198242, + "logits/rejected": -0.5197386741638184, + "logps/chosen": -91.65974426269531, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -140.69219970703125, + "loss": 1.1682, + "margin_dpo/margin_mean": 34.507667541503906, + "margin_dpo/margin_std": 58.28282165527344, + "step": 112 + }, + { + "KL/chosen_KL_mean": -53.2155647277832, + "KL/mean": -70.94862365722656, + "KL/rejected_KL_mean": -88.68169403076172, + "KL/std": 53.065284729003906, + "epoch": 0.16593245227606462, + "fcm_dpo/beta": 0.009143839590251446, + "fcm_dpo/delta": 0.07819047570228577, + "fcm_dpo/margin": 35.46611785888672, + "fcm_dpo/q_t": 0.42639607191085815, + "grad_norm": 22.58910369873047, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.5312271118164062, + "logits/rejected": -0.4909241497516632, + "logps/chosen": -119.92851257324219, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -166.650390625, + "loss": 1.1542, + "margin_dpo/margin_mean": 35.466121673583984, + "margin_dpo/margin_std": 58.21109390258789, + "step": 113 + }, + { + "KL/chosen_KL_mean": -46.58165740966797, + "KL/mean": -75.94132232666016, + "KL/rejected_KL_mean": -105.30099487304688, + "KL/std": 72.10044860839844, + "epoch": 0.16740088105726872, + "fcm_dpo/beta": 0.008939421735703945, + "fcm_dpo/delta": -0.13344621658325195, + "fcm_dpo/margin": 58.719337463378906, + "fcm_dpo/q_t": 0.3906566798686981, + "grad_norm": 19.71776580810547, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.5248334407806396, + "logits/rejected": -0.4965624213218689, + "logps/chosen": -104.36673736572266, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -192.41064453125, + "loss": 1.0516, + "margin_dpo/margin_mean": 58.719337463378906, + "margin_dpo/margin_std": 87.8689956665039, + "step": 114 + }, + { + "KL/chosen_KL_mean": -59.11194610595703, + "KL/mean": -79.4608154296875, + "KL/rejected_KL_mean": -99.8096923828125, + "KL/std": 69.4343490600586, + "epoch": 0.16886930983847284, + "fcm_dpo/beta": 0.009012718684971333, + "fcm_dpo/delta": 0.034050408750772476, + "fcm_dpo/margin": 40.69775390625, + "fcm_dpo/q_t": 0.424325168132782, + "grad_norm": 25.762571334838867, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.5764279961585999, + "logits/rejected": -0.5661026239395142, + "logps/chosen": -124.69458770751953, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -198.37521362304688, + "loss": 1.2073, + "margin_dpo/margin_mean": 40.69775390625, + "margin_dpo/margin_std": 93.00070190429688, + "step": 115 + }, + { + "KL/chosen_KL_mean": -42.108062744140625, + "KL/mean": -64.91938018798828, + "KL/rejected_KL_mean": -87.73070526123047, + "KL/std": 55.80364990234375, + "epoch": 0.17033773861967694, + "fcm_dpo/beta": 0.00902644731104374, + "fcm_dpo/delta": -0.012853723019361496, + "fcm_dpo/margin": 45.62263488769531, + "fcm_dpo/q_t": 0.40703320503234863, + "grad_norm": 18.27412223815918, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.5399957895278931, + "logits/rejected": -0.5367946624755859, + "logps/chosen": -93.50837707519531, + "logps/ref_chosen": -51.40031433105469, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -168.2525634765625, + "loss": 1.0915, + "margin_dpo/margin_mean": 45.62263870239258, + "margin_dpo/margin_std": 62.58723831176758, + "step": 116 + }, + { + "KL/chosen_KL_mean": -55.41862487792969, + "KL/mean": -72.41690826416016, + "KL/rejected_KL_mean": -89.41517639160156, + "KL/std": 57.73213195800781, + "epoch": 0.17180616740088106, + "fcm_dpo/beta": 0.009095819666981697, + "fcm_dpo/delta": 0.09368915110826492, + "fcm_dpo/margin": 33.996551513671875, + "fcm_dpo/q_t": 0.430633544921875, + "grad_norm": 24.457244873046875, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.527985692024231, + "logits/rejected": -0.4939156174659729, + "logps/chosen": -124.71703338623047, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.583984375, + "logps/rejected": -155.99917602539062, + "loss": 1.1898, + "margin_dpo/margin_mean": 33.996551513671875, + "margin_dpo/margin_std": 68.07157897949219, + "step": 117 + }, + { + "KL/chosen_KL_mean": -43.04719161987305, + "KL/mean": -64.5184326171875, + "KL/rejected_KL_mean": -85.98967742919922, + "KL/std": 53.621681213378906, + "epoch": 0.17327459618208516, + "fcm_dpo/beta": 0.009164330549538136, + "fcm_dpo/delta": 0.006666237488389015, + "fcm_dpo/margin": 42.942481994628906, + "fcm_dpo/q_t": 0.4111817479133606, + "grad_norm": 18.125078201293945, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.5757678151130676, + "logits/rejected": -0.558840274810791, + "logps/chosen": -98.68817138671875, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905975341797, + "logps/rejected": -161.6587371826172, + "loss": 1.1042, + "margin_dpo/margin_mean": 42.942481994628906, + "margin_dpo/margin_std": 61.708221435546875, + "step": 118 + }, + { + "KL/chosen_KL_mean": -58.032772064208984, + "KL/mean": -79.35901641845703, + "KL/rejected_KL_mean": -100.68525695800781, + "KL/std": 59.09315490722656, + "epoch": 0.17474302496328928, + "fcm_dpo/beta": 0.009107567369937897, + "fcm_dpo/delta": 0.011166650801897049, + "fcm_dpo/margin": 42.652496337890625, + "fcm_dpo/q_t": 0.4156750738620758, + "grad_norm": 19.924360275268555, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.5483442544937134, + "logits/rejected": -0.5380154848098755, + "logps/chosen": -131.54296875, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.977294921875, + "logps/rejected": -203.6625518798828, + "loss": 1.1422, + "margin_dpo/margin_mean": 42.652496337890625, + "margin_dpo/margin_std": 74.453125, + "step": 119 + }, + { + "KL/chosen_KL_mean": -57.64821243286133, + "KL/mean": -87.497314453125, + "KL/rejected_KL_mean": -117.34640502929688, + "KL/std": 71.73361206054688, + "epoch": 0.1762114537444934, + "fcm_dpo/beta": 0.008971385657787323, + "fcm_dpo/delta": -0.14341270923614502, + "fcm_dpo/margin": 59.69819641113281, + "fcm_dpo/q_t": 0.381797730922699, + "grad_norm": 20.973718643188477, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.5744304656982422, + "logits/rejected": -0.5464004278182983, + "logps/chosen": -134.42904663085938, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -225.37014770507812, + "loss": 1.0263, + "margin_dpo/margin_mean": 59.69819641113281, + "margin_dpo/margin_std": 77.37503051757812, + "step": 120 + }, + { + "KL/chosen_KL_mean": -53.727439880371094, + "KL/mean": -87.06233215332031, + "KL/rejected_KL_mean": -120.39723205566406, + "KL/std": 69.44754028320312, + "epoch": 0.1776798825256975, + "fcm_dpo/beta": 0.008634019643068314, + "fcm_dpo/delta": -0.187477245926857, + "fcm_dpo/margin": 66.66979217529297, + "fcm_dpo/q_t": 0.37362366914749146, + "grad_norm": 19.9356689453125, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.5384774208068848, + "logits/rejected": -0.5364508628845215, + "logps/chosen": -115.517333984375, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -230.3917999267578, + "loss": 1.0099, + "margin_dpo/margin_mean": 66.66979217529297, + "margin_dpo/margin_std": 85.21018981933594, + "step": 121 + }, + { + "KL/chosen_KL_mean": -47.43219757080078, + "KL/mean": -87.10603332519531, + "KL/rejected_KL_mean": -126.77987670898438, + "KL/std": 75.40602111816406, + "epoch": 0.17914831130690162, + "fcm_dpo/beta": 0.008252674713730812, + "fcm_dpo/delta": -0.27251002192497253, + "fcm_dpo/margin": 79.34767150878906, + "fcm_dpo/q_t": 0.35448387265205383, + "grad_norm": 20.8872013092041, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.4834294021129608, + "logits/rejected": -0.5015791058540344, + "logps/chosen": -94.33441162109375, + "logps/ref_chosen": -46.9022102355957, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -233.4940643310547, + "loss": 0.9294, + "margin_dpo/margin_mean": 79.34767150878906, + "margin_dpo/margin_std": 80.3590316772461, + "step": 122 + }, + { + "KL/chosen_KL_mean": -53.51214599609375, + "KL/mean": -79.38796997070312, + "KL/rejected_KL_mean": -105.2637939453125, + "KL/std": 65.03099060058594, + "epoch": 0.18061674008810572, + "fcm_dpo/beta": 0.008060860447585583, + "fcm_dpo/delta": -0.018722567707300186, + "fcm_dpo/margin": 51.75164031982422, + "fcm_dpo/q_t": 0.4093893766403198, + "grad_norm": 21.224868774414062, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.504252552986145, + "logits/rejected": -0.48894137144088745, + "logps/chosen": -114.85078430175781, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.775390625, + "logps/rejected": -193.0391845703125, + "loss": 1.1298, + "margin_dpo/margin_mean": 51.75164031982422, + "margin_dpo/margin_std": 88.38829040527344, + "step": 123 + }, + { + "KL/chosen_KL_mean": -62.671470642089844, + "KL/mean": -101.19732666015625, + "KL/rejected_KL_mean": -139.72317504882812, + "KL/std": 90.27295684814453, + "epoch": 0.18208516886930984, + "fcm_dpo/beta": 0.007825289852917194, + "fcm_dpo/delta": -0.21662405133247375, + "fcm_dpo/margin": 77.05169677734375, + "fcm_dpo/q_t": 0.37590959668159485, + "grad_norm": 21.635025024414062, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.4845294654369354, + "logits/rejected": -0.488964319229126, + "logps/chosen": -134.11981201171875, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -257.3037414550781, + "loss": 1.0383, + "margin_dpo/margin_mean": 77.05169677734375, + "margin_dpo/margin_std": 113.15299987792969, + "step": 124 + }, + { + "KL/chosen_KL_mean": -49.68274688720703, + "KL/mean": -82.18218231201172, + "KL/rejected_KL_mean": -114.68162536621094, + "KL/std": 74.36106872558594, + "epoch": 0.18355359765051396, + "fcm_dpo/beta": 0.007650085724890232, + "fcm_dpo/delta": -0.10223683714866638, + "fcm_dpo/margin": 64.99887084960938, + "fcm_dpo/q_t": 0.3918633460998535, + "grad_norm": 18.291349411010742, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.4274330139160156, + "logits/rejected": -0.4299238622188568, + "logps/chosen": -99.81968688964844, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -198.67022705078125, + "loss": 1.0649, + "margin_dpo/margin_mean": 64.99887084960938, + "margin_dpo/margin_std": 93.5775375366211, + "step": 125 + }, + { + "KL/chosen_KL_mean": -53.004913330078125, + "KL/mean": -84.8802719116211, + "KL/rejected_KL_mean": -116.75562286376953, + "KL/std": 69.43212890625, + "epoch": 0.18502202643171806, + "fcm_dpo/beta": 0.007492750883102417, + "fcm_dpo/delta": -0.08181630074977875, + "fcm_dpo/margin": 63.75071716308594, + "fcm_dpo/q_t": 0.3952334523200989, + "grad_norm": 18.59366226196289, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.4854479432106018, + "logits/rejected": -0.4832964539527893, + "logps/chosen": -108.67198181152344, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -214.8853759765625, + "loss": 1.0617, + "margin_dpo/margin_mean": 63.75071716308594, + "margin_dpo/margin_std": 88.2365493774414, + "step": 126 + }, + { + "KL/chosen_KL_mean": -51.491268157958984, + "KL/mean": -73.90081787109375, + "KL/rejected_KL_mean": -96.31036376953125, + "KL/std": 60.034873962402344, + "epoch": 0.18649045521292218, + "fcm_dpo/beta": 0.007554207928478718, + "fcm_dpo/delta": 0.06351131945848465, + "fcm_dpo/margin": 44.81909942626953, + "fcm_dpo/q_t": 0.4214317202568054, + "grad_norm": 17.630146026611328, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.4814761281013489, + "logits/rejected": -0.46358734369277954, + "logps/chosen": -108.04594421386719, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -173.10614013671875, + "loss": 1.152, + "margin_dpo/margin_mean": 44.81909942626953, + "margin_dpo/margin_std": 74.9388198852539, + "step": 127 + }, + { + "KL/chosen_KL_mean": -52.88978958129883, + "KL/mean": -76.76133728027344, + "KL/rejected_KL_mean": -100.63288879394531, + "KL/std": 68.35536193847656, + "epoch": 0.18795888399412627, + "fcm_dpo/beta": 0.007643786258995533, + "fcm_dpo/delta": 0.03590956702828407, + "fcm_dpo/margin": 47.74309539794922, + "fcm_dpo/q_t": 0.4184267520904541, + "grad_norm": 19.399093627929688, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.5004081726074219, + "logits/rejected": -0.4844392240047455, + "logps/chosen": -111.0107421875, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -177.0718536376953, + "loss": 1.1543, + "margin_dpo/margin_mean": 47.74309539794922, + "margin_dpo/margin_std": 85.46942138671875, + "step": 128 + }, + { + "KL/chosen_KL_mean": -65.713623046875, + "KL/mean": -90.0938720703125, + "KL/rejected_KL_mean": -114.47410583496094, + "KL/std": 78.53107452392578, + "epoch": 0.1894273127753304, + "fcm_dpo/beta": 0.007660663686692715, + "fcm_dpo/delta": 0.027477692812681198, + "fcm_dpo/margin": 48.760475158691406, + "fcm_dpo/q_t": 0.42177292704582214, + "grad_norm": 19.868791580200195, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.5160699486732483, + "logits/rejected": -0.5076286792755127, + "logps/chosen": -132.6300048828125, + "logps/ref_chosen": -66.91637420654297, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -211.11631774902344, + "loss": 1.1857, + "margin_dpo/margin_mean": 48.760475158691406, + "margin_dpo/margin_std": 104.92391967773438, + "step": 129 + }, + { + "KL/chosen_KL_mean": -47.17679977416992, + "KL/mean": -84.11569213867188, + "KL/rejected_KL_mean": -121.05458068847656, + "KL/std": 75.62738037109375, + "epoch": 0.19089574155653452, + "fcm_dpo/beta": 0.0075556435622274876, + "fcm_dpo/delta": -0.16790251433849335, + "fcm_dpo/margin": 73.87777709960938, + "fcm_dpo/q_t": 0.37920111417770386, + "grad_norm": 16.111379623413086, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.4705796241760254, + "logits/rejected": -0.46549493074417114, + "logps/chosen": -91.8436508178711, + "logps/ref_chosen": -44.66685104370117, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -203.83624267578125, + "loss": 1.0229, + "margin_dpo/margin_mean": 73.87777709960938, + "margin_dpo/margin_std": 93.94640350341797, + "step": 130 + }, + { + "KL/chosen_KL_mean": -43.336204528808594, + "KL/mean": -80.46978759765625, + "KL/rejected_KL_mean": -117.60336303710938, + "KL/std": 78.42054748535156, + "epoch": 0.19236417033773862, + "fcm_dpo/beta": 0.007246783934533596, + "fcm_dpo/delta": -0.1462840735912323, + "fcm_dpo/margin": 74.26715087890625, + "fcm_dpo/q_t": 0.3767462372779846, + "grad_norm": 20.645126342773438, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.4658737778663635, + "logits/rejected": -0.47121483087539673, + "logps/chosen": -88.26079559326172, + "logps/ref_chosen": -44.924591064453125, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -206.04736328125, + "loss": 1.0082, + "margin_dpo/margin_mean": 74.26715850830078, + "margin_dpo/margin_std": 86.19850158691406, + "step": 131 + }, + { + "KL/chosen_KL_mean": -56.63502502441406, + "KL/mean": -87.46903228759766, + "KL/rejected_KL_mean": -118.30303955078125, + "KL/std": 77.27864074707031, + "epoch": 0.19383259911894274, + "fcm_dpo/beta": 0.007140764966607094, + "fcm_dpo/delta": -0.04240689054131508, + "fcm_dpo/margin": 61.66801452636719, + "fcm_dpo/q_t": 0.4060777425765991, + "grad_norm": 18.950071334838867, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.5243598222732544, + "logits/rejected": -0.525653600692749, + "logps/chosen": -115.6361083984375, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -206.1951904296875, + "loss": 1.1026, + "margin_dpo/margin_mean": 61.66801452636719, + "margin_dpo/margin_std": 99.96194458007812, + "step": 132 + }, + { + "KL/chosen_KL_mean": -67.54967498779297, + "KL/mean": -93.98008728027344, + "KL/rejected_KL_mean": -120.41049194335938, + "KL/std": 68.62925720214844, + "epoch": 0.19530102790014683, + "fcm_dpo/beta": 0.007148602977395058, + "fcm_dpo/delta": 0.022928927093744278, + "fcm_dpo/margin": 52.86082458496094, + "fcm_dpo/q_t": 0.4168737530708313, + "grad_norm": 20.872831344604492, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.479339599609375, + "logits/rejected": -0.46433907747268677, + "logps/chosen": -134.1541748046875, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -216.74404907226562, + "loss": 1.1346, + "margin_dpo/margin_mean": 52.86082458496094, + "margin_dpo/margin_std": 88.52780151367188, + "step": 133 + }, + { + "KL/chosen_KL_mean": -51.839813232421875, + "KL/mean": -80.39920043945312, + "KL/rejected_KL_mean": -108.95858001708984, + "KL/std": 64.22247314453125, + "epoch": 0.19676945668135096, + "fcm_dpo/beta": 0.0071844616904854774, + "fcm_dpo/delta": -0.010975977405905724, + "fcm_dpo/margin": 57.118778228759766, + "fcm_dpo/q_t": 0.40691226720809937, + "grad_norm": 16.936574935913086, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.4439271092414856, + "logits/rejected": -0.429843544960022, + "logps/chosen": -103.90907287597656, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -196.61309814453125, + "loss": 1.0822, + "margin_dpo/margin_mean": 57.118778228759766, + "margin_dpo/margin_std": 74.62931060791016, + "step": 134 + }, + { + "KL/chosen_KL_mean": -57.692352294921875, + "KL/mean": -100.01170349121094, + "KL/rejected_KL_mean": -142.3310546875, + "KL/std": 90.0093002319336, + "epoch": 0.19823788546255505, + "fcm_dpo/beta": 0.006944045424461365, + "fcm_dpo/delta": -0.1996196210384369, + "fcm_dpo/margin": 84.63871765136719, + "fcm_dpo/q_t": 0.3745703101158142, + "grad_norm": 18.334857940673828, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.4614737033843994, + "logits/rejected": -0.49732786417007446, + "logps/chosen": -108.04621887207031, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -258.310791015625, + "loss": 1.009, + "margin_dpo/margin_mean": 84.63871765136719, + "margin_dpo/margin_std": 110.02676391601562, + "step": 135 + }, + { + "KL/chosen_KL_mean": -69.69944763183594, + "KL/mean": -98.3271484375, + "KL/rejected_KL_mean": -126.95484924316406, + "KL/std": 84.19580078125, + "epoch": 0.19970631424375918, + "fcm_dpo/beta": 0.006846585310995579, + "fcm_dpo/delta": 0.007697347551584244, + "fcm_dpo/margin": 57.25541305541992, + "fcm_dpo/q_t": 0.4213330149650574, + "grad_norm": 19.6416015625, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.4833676218986511, + "logits/rejected": -0.4771433472633362, + "logps/chosen": -134.77195739746094, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -223.27609252929688, + "loss": 1.1537, + "margin_dpo/margin_mean": 57.255409240722656, + "margin_dpo/margin_std": 109.88507080078125, + "step": 136 + }, + { + "KL/chosen_KL_mean": -64.6696548461914, + "KL/mean": -109.20771789550781, + "KL/rejected_KL_mean": -153.74578857421875, + "KL/std": 107.58679962158203, + "epoch": 0.2011747430249633, + "fcm_dpo/beta": 0.006671931594610214, + "fcm_dpo/delta": -0.20688247680664062, + "fcm_dpo/margin": 89.07612609863281, + "fcm_dpo/q_t": 0.3791872262954712, + "grad_norm": 16.7557373046875, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.419033944606781, + "logits/rejected": -0.4432998299598694, + "logps/chosen": -113.42877197265625, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86376953125, + "logps/rejected": -267.60955810546875, + "loss": 1.0331, + "margin_dpo/margin_mean": 89.07611846923828, + "margin_dpo/margin_std": 130.33053588867188, + "step": 137 + }, + { + "KL/chosen_KL_mean": -69.46388244628906, + "KL/mean": -103.4703369140625, + "KL/rejected_KL_mean": -137.47679138183594, + "KL/std": 83.18930053710938, + "epoch": 0.2026431718061674, + "fcm_dpo/beta": 0.006574639119207859, + "fcm_dpo/delta": -0.04935392364859581, + "fcm_dpo/margin": 68.0129165649414, + "fcm_dpo/q_t": 0.39788979291915894, + "grad_norm": 18.415468215942383, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.40760838985443115, + "logits/rejected": -0.3944551348686218, + "logps/chosen": -129.9835205078125, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -230.67373657226562, + "loss": 1.0613, + "margin_dpo/margin_mean": 68.0129165649414, + "margin_dpo/margin_std": 85.65242004394531, + "step": 138 + }, + { + "KL/chosen_KL_mean": -58.71821975708008, + "KL/mean": -98.14753723144531, + "KL/rejected_KL_mean": -137.57684326171875, + "KL/std": 77.90769958496094, + "epoch": 0.20411160058737152, + "fcm_dpo/beta": 0.006447950378060341, + "fcm_dpo/delta": -0.11413857340812683, + "fcm_dpo/margin": 78.85862731933594, + "fcm_dpo/q_t": 0.38780367374420166, + "grad_norm": 17.002193450927734, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.40922728180885315, + "logits/rejected": -0.39873528480529785, + "logps/chosen": -105.60960388183594, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -217.30484008789062, + "loss": 1.0361, + "margin_dpo/margin_mean": 78.85863494873047, + "margin_dpo/margin_std": 103.01472473144531, + "step": 139 + }, + { + "KL/chosen_KL_mean": -71.46368408203125, + "KL/mean": -106.60093688964844, + "KL/rejected_KL_mean": -141.73822021484375, + "KL/std": 86.1796875, + "epoch": 0.2055800293685756, + "fcm_dpo/beta": 0.006347954273223877, + "fcm_dpo/delta": -0.04827836528420448, + "fcm_dpo/margin": 70.2745361328125, + "fcm_dpo/q_t": 0.4006612300872803, + "grad_norm": 19.857847213745117, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.3932759761810303, + "logits/rejected": -0.37940922379493713, + "logps/chosen": -130.43838500976562, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28410339355469, + "logps/rejected": -225.02232360839844, + "loss": 1.0896, + "margin_dpo/margin_mean": 70.2745361328125, + "margin_dpo/margin_std": 105.37348937988281, + "step": 140 + }, + { + "KL/chosen_KL_mean": -81.21903991699219, + "KL/mean": -114.55131530761719, + "KL/rejected_KL_mean": -147.88356018066406, + "KL/std": 97.91165161132812, + "epoch": 0.20704845814977973, + "fcm_dpo/beta": 0.006319768726825714, + "fcm_dpo/delta": -0.022237718105316162, + "fcm_dpo/margin": 66.66454315185547, + "fcm_dpo/q_t": 0.4057791233062744, + "grad_norm": 25.337135314941406, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.4274560213088989, + "logits/rejected": -0.41715872287750244, + "logps/chosen": -156.29470825195312, + "logps/ref_chosen": -75.07566833496094, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -246.07583618164062, + "loss": 1.1267, + "margin_dpo/margin_mean": 66.66454315185547, + "margin_dpo/margin_std": 114.31845092773438, + "step": 141 + }, + { + "KL/chosen_KL_mean": -79.89231872558594, + "KL/mean": -118.12299346923828, + "KL/rejected_KL_mean": -156.35366821289062, + "KL/std": 101.84807586669922, + "epoch": 0.20851688693098386, + "fcm_dpo/beta": 0.006258774548768997, + "fcm_dpo/delta": -0.08255193382501602, + "fcm_dpo/margin": 76.46134185791016, + "fcm_dpo/q_t": 0.39626699686050415, + "grad_norm": 26.112525939941406, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.4015616774559021, + "logits/rejected": -0.4008292555809021, + "logps/chosen": -137.92025756835938, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222961425781, + "logps/rejected": -250.93588256835938, + "loss": 1.095, + "margin_dpo/margin_mean": 76.46134185791016, + "margin_dpo/margin_std": 119.62361145019531, + "step": 142 + }, + { + "KL/chosen_KL_mean": -84.15359497070312, + "KL/mean": -108.9317626953125, + "KL/rejected_KL_mean": -133.70993041992188, + "KL/std": 89.8084945678711, + "epoch": 0.20998531571218795, + "fcm_dpo/beta": 0.006263419054448605, + "fcm_dpo/delta": 0.09250222891569138, + "fcm_dpo/margin": 49.55633544921875, + "fcm_dpo/q_t": 0.4340221583843231, + "grad_norm": 23.864248275756836, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.3417607545852661, + "logits/rejected": -0.33318936824798584, + "logps/chosen": -141.7500457763672, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -212.70950317382812, + "loss": 1.2025, + "margin_dpo/margin_mean": 49.55633544921875, + "margin_dpo/margin_std": 106.16546630859375, + "step": 143 + }, + { + "KL/chosen_KL_mean": -76.2918701171875, + "KL/mean": -105.60356140136719, + "KL/rejected_KL_mean": -134.91525268554688, + "KL/std": 77.67918395996094, + "epoch": 0.21145374449339208, + "fcm_dpo/beta": 0.006343062035739422, + "fcm_dpo/delta": 0.029245702549815178, + "fcm_dpo/margin": 58.62339782714844, + "fcm_dpo/q_t": 0.41614866256713867, + "grad_norm": 21.587350845336914, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.37479305267333984, + "logits/rejected": -0.3632616400718689, + "logps/chosen": -136.1982421875, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -216.91552734375, + "loss": 1.1285, + "margin_dpo/margin_mean": 58.62339782714844, + "margin_dpo/margin_std": 91.50389099121094, + "step": 144 + }, + { + "KL/chosen_KL_mean": -73.16571044921875, + "KL/mean": -104.94743347167969, + "KL/rejected_KL_mean": -136.72915649414062, + "KL/std": 75.64851379394531, + "epoch": 0.21292217327459617, + "fcm_dpo/beta": 0.006349918898195028, + "fcm_dpo/delta": -0.0037822211161255836, + "fcm_dpo/margin": 63.56344985961914, + "fcm_dpo/q_t": 0.4085870385169983, + "grad_norm": 22.88797950744629, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.40823960304260254, + "logits/rejected": -0.3923068642616272, + "logps/chosen": -129.76637268066406, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -214.595458984375, + "loss": 1.1044, + "margin_dpo/margin_mean": 63.56344985961914, + "margin_dpo/margin_std": 93.52117919921875, + "step": 145 + }, + { + "KL/chosen_KL_mean": -96.4278793334961, + "KL/mean": -121.21267700195312, + "KL/rejected_KL_mean": -145.9974822998047, + "KL/std": 80.74990844726562, + "epoch": 0.2143906020558003, + "fcm_dpo/beta": 0.006410893052816391, + "fcm_dpo/delta": 0.08496344089508057, + "fcm_dpo/margin": 49.569610595703125, + "fcm_dpo/q_t": 0.427983820438385, + "grad_norm": 26.765342712402344, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.3858996033668518, + "logits/rejected": -0.3604584336280823, + "logps/chosen": -162.42832946777344, + "logps/ref_chosen": -66.00045013427734, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -227.7002716064453, + "loss": 1.1884, + "margin_dpo/margin_mean": 49.569610595703125, + "margin_dpo/margin_std": 97.2584228515625, + "step": 146 + }, + { + "KL/chosen_KL_mean": -70.62712097167969, + "KL/mean": -103.94308471679688, + "KL/rejected_KL_mean": -137.25904846191406, + "KL/std": 82.13803100585938, + "epoch": 0.21585903083700442, + "fcm_dpo/beta": 0.006426135078072548, + "fcm_dpo/delta": -0.029495222494006157, + "fcm_dpo/margin": 66.63192749023438, + "fcm_dpo/q_t": 0.40504029393196106, + "grad_norm": 20.992704391479492, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.41126739978790283, + "logits/rejected": -0.39271873235702515, + "logps/chosen": -124.03260803222656, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39060974121094, + "logps/rejected": -208.649658203125, + "loss": 1.1021, + "margin_dpo/margin_mean": 66.63192749023438, + "margin_dpo/margin_std": 102.30528259277344, + "step": 147 + }, + { + "KL/chosen_KL_mean": -70.48782348632812, + "KL/mean": -99.92208862304688, + "KL/rejected_KL_mean": -129.35635375976562, + "KL/std": 84.81340026855469, + "epoch": 0.2173274596182085, + "fcm_dpo/beta": 0.0063689956441521645, + "fcm_dpo/delta": -0.08440735191106796, + "fcm_dpo/margin": 58.8685302734375, + "fcm_dpo/q_t": 0.41790372133255005, + "grad_norm": 18.33550262451172, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.44826143980026245, + "logits/rejected": -0.4405372738838196, + "logps/chosen": -135.42489624023438, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -232.4501953125, + "loss": 1.1302, + "margin_dpo/margin_mean": 58.868534088134766, + "margin_dpo/margin_std": 89.78065490722656, + "step": 148 + }, + { + "KL/chosen_KL_mean": -66.89385986328125, + "KL/mean": -102.25575256347656, + "KL/rejected_KL_mean": -137.61764526367188, + "KL/std": 72.74675750732422, + "epoch": 0.21879588839941264, + "fcm_dpo/beta": 0.006224669516086578, + "fcm_dpo/delta": -0.043726127594709396, + "fcm_dpo/margin": 70.72378540039062, + "fcm_dpo/q_t": 0.39915308356285095, + "grad_norm": 17.068838119506836, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.4722484350204468, + "logits/rejected": -0.4587002694606781, + "logps/chosen": -125.36762237548828, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -236.93238830566406, + "loss": 1.0654, + "margin_dpo/margin_mean": 70.72378540039062, + "margin_dpo/margin_std": 85.64163208007812, + "step": 149 + }, + { + "KL/chosen_KL_mean": -58.0627555847168, + "KL/mean": -93.5211181640625, + "KL/rejected_KL_mean": -128.97947692871094, + "KL/std": 83.53118896484375, + "epoch": 0.22026431718061673, + "fcm_dpo/beta": 0.006181714590638876, + "fcm_dpo/delta": -0.04127602279186249, + "fcm_dpo/margin": 70.91671752929688, + "fcm_dpo/q_t": 0.4037661552429199, + "grad_norm": 17.37567710876465, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.4367871582508087, + "logits/rejected": -0.43975830078125, + "logps/chosen": -103.76856994628906, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -212.3270721435547, + "loss": 1.0768, + "margin_dpo/margin_mean": 70.91671752929688, + "margin_dpo/margin_std": 98.11479187011719, + "step": 150 + }, + { + "KL/chosen_KL_mean": -73.78369140625, + "KL/mean": -109.70790100097656, + "KL/rejected_KL_mean": -145.63211059570312, + "KL/std": 83.37910461425781, + "epoch": 0.22173274596182085, + "fcm_dpo/beta": 0.006163077428936958, + "fcm_dpo/delta": -0.044833216816186905, + "fcm_dpo/margin": 71.84840393066406, + "fcm_dpo/q_t": 0.3990030288696289, + "grad_norm": 21.97403907775879, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.5252622365951538, + "logits/rejected": -0.49835896492004395, + "logps/chosen": -144.35452270507812, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -246.09591674804688, + "loss": 1.065, + "margin_dpo/margin_mean": 71.84840393066406, + "margin_dpo/margin_std": 91.86006164550781, + "step": 151 + }, + { + "KL/chosen_KL_mean": -64.33494567871094, + "KL/mean": -107.04579162597656, + "KL/rejected_KL_mean": -149.75662231445312, + "KL/std": 83.78707122802734, + "epoch": 0.22320117474302498, + "fcm_dpo/beta": 0.0060538845136761665, + "fcm_dpo/delta": -0.12335566431283951, + "fcm_dpo/margin": 85.42166900634766, + "fcm_dpo/q_t": 0.3849901556968689, + "grad_norm": 20.60307502746582, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.47641807794570923, + "logits/rejected": -0.47393327951431274, + "logps/chosen": -124.49932861328125, + "logps/ref_chosen": -60.16438674926758, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -255.89707946777344, + "loss": 1.0264, + "margin_dpo/margin_mean": 85.42166900634766, + "margin_dpo/margin_std": 103.75975799560547, + "step": 152 + }, + { + "KL/chosen_KL_mean": -62.89659881591797, + "KL/mean": -99.18505859375, + "KL/rejected_KL_mean": -135.47352600097656, + "KL/std": 90.631591796875, + "epoch": 0.22466960352422907, + "fcm_dpo/beta": 0.005982040427625179, + "fcm_dpo/delta": -0.03570834919810295, + "fcm_dpo/margin": 72.57691955566406, + "fcm_dpo/q_t": 0.40594881772994995, + "grad_norm": 14.930242538452148, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.4422386884689331, + "logits/rejected": -0.43338215351104736, + "logps/chosen": -119.21187591552734, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -221.12936401367188, + "loss": 1.0966, + "margin_dpo/margin_mean": 72.57691955566406, + "margin_dpo/margin_std": 111.9495849609375, + "step": 153 + }, + { + "KL/chosen_KL_mean": -76.06448364257812, + "KL/mean": -110.12120056152344, + "KL/rejected_KL_mean": -144.17791748046875, + "KL/std": 89.9405517578125, + "epoch": 0.2261380323054332, + "fcm_dpo/beta": 0.00597739452496171, + "fcm_dpo/delta": -0.007730741053819656, + "fcm_dpo/margin": 68.11343383789062, + "fcm_dpo/q_t": 0.4101860225200653, + "grad_norm": 17.922649383544922, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.49747714400291443, + "logits/rejected": -0.5010119676589966, + "logps/chosen": -138.8070526123047, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -248.422119140625, + "loss": 1.1354, + "margin_dpo/margin_mean": 68.11343383789062, + "margin_dpo/margin_std": 118.15486145019531, + "step": 154 + }, + { + "KL/chosen_KL_mean": -68.90711975097656, + "KL/mean": -102.35006713867188, + "KL/rejected_KL_mean": -135.7930145263672, + "KL/std": 81.32989501953125, + "epoch": 0.2276064610866373, + "fcm_dpo/beta": 0.0059681423008441925, + "fcm_dpo/delta": 0.0006999801844358444, + "fcm_dpo/margin": 66.88587951660156, + "fcm_dpo/q_t": 0.4097937345504761, + "grad_norm": 18.32874298095703, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.48518913984298706, + "logits/rejected": -0.4643056392669678, + "logps/chosen": -129.560302734375, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -213.28521728515625, + "loss": 1.1141, + "margin_dpo/margin_mean": 66.88587951660156, + "margin_dpo/margin_std": 103.57026672363281, + "step": 155 + }, + { + "KL/chosen_KL_mean": -92.63871765136719, + "KL/mean": -112.63226318359375, + "KL/rejected_KL_mean": -132.6258087158203, + "KL/std": 85.67424011230469, + "epoch": 0.2290748898678414, + "fcm_dpo/beta": 0.006016138941049576, + "fcm_dpo/delta": 0.05333181843161583, + "fcm_dpo/margin": 39.98707962036133, + "fcm_dpo/q_t": 0.444929301738739, + "grad_norm": 26.82451629638672, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.4589994549751282, + "logits/rejected": -0.4519059658050537, + "logps/chosen": -162.13059997558594, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.16929626464844, + "logps/rejected": -209.79510498046875, + "loss": 1.2724, + "margin_dpo/margin_mean": 39.98707580566406, + "margin_dpo/margin_std": 114.85380554199219, + "step": 156 + }, + { + "KL/chosen_KL_mean": -82.70185089111328, + "KL/mean": -126.093505859375, + "KL/rejected_KL_mean": -169.48513793945312, + "KL/std": 92.4810791015625, + "epoch": 0.2305433186490455, + "fcm_dpo/beta": 0.005881883203983307, + "fcm_dpo/delta": -0.11752188205718994, + "fcm_dpo/margin": 86.78329467773438, + "fcm_dpo/q_t": 0.38307642936706543, + "grad_norm": 21.786405563354492, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.4166560769081116, + "logits/rejected": -0.42006832361221313, + "logps/chosen": -144.07028198242188, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -277.13153076171875, + "loss": 1.0327, + "margin_dpo/margin_mean": 86.78329467773438, + "margin_dpo/margin_std": 104.9678955078125, + "step": 157 + }, + { + "KL/chosen_KL_mean": -84.41264343261719, + "KL/mean": -131.40390014648438, + "KL/rejected_KL_mean": -178.39517211914062, + "KL/std": 117.00088500976562, + "epoch": 0.23201174743024963, + "fcm_dpo/beta": 0.0057478612288832664, + "fcm_dpo/delta": -0.14822149276733398, + "fcm_dpo/margin": 93.98252868652344, + "fcm_dpo/q_t": 0.38809406757354736, + "grad_norm": 17.88888931274414, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.3769122362136841, + "logits/rejected": -0.3925984501838684, + "logps/chosen": -142.02557373046875, + "logps/ref_chosen": -57.612918853759766, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -292.08984375, + "loss": 1.0569, + "margin_dpo/margin_mean": 93.98252868652344, + "margin_dpo/margin_std": 142.27569580078125, + "step": 158 + }, + { + "KL/chosen_KL_mean": -95.15133666992188, + "KL/mean": -126.28166198730469, + "KL/rejected_KL_mean": -157.41195678710938, + "KL/std": 101.34651184082031, + "epoch": 0.23348017621145375, + "fcm_dpo/beta": 0.00576662877574563, + "fcm_dpo/delta": 0.04188086465001106, + "fcm_dpo/margin": 62.26060485839844, + "fcm_dpo/q_t": 0.41926220059394836, + "grad_norm": 26.526140213012695, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.4394528865814209, + "logits/rejected": -0.4065374433994293, + "logps/chosen": -176.71168518066406, + "logps/ref_chosen": -81.56034851074219, + "logps/ref_rejected": -88.89871215820312, + "logps/rejected": -246.3106689453125, + "loss": 1.1579, + "margin_dpo/margin_mean": 62.26060485839844, + "margin_dpo/margin_std": 111.88368225097656, + "step": 159 + }, + { + "KL/chosen_KL_mean": -97.3049087524414, + "KL/mean": -140.1513671875, + "KL/rejected_KL_mean": -182.99783325195312, + "KL/std": 107.30947875976562, + "epoch": 0.23494860499265785, + "fcm_dpo/beta": 0.005649491213262081, + "fcm_dpo/delta": -0.08930858224630356, + "fcm_dpo/margin": 85.69293212890625, + "fcm_dpo/q_t": 0.3967619240283966, + "grad_norm": 20.257490158081055, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.38310354948043823, + "logits/rejected": -0.37118303775787354, + "logps/chosen": -163.03579711914062, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -280.21563720703125, + "loss": 1.0875, + "margin_dpo/margin_mean": 85.69292449951172, + "margin_dpo/margin_std": 134.3516387939453, + "step": 160 + }, + { + "KL/chosen_KL_mean": -83.62994384765625, + "KL/mean": -121.65289306640625, + "KL/rejected_KL_mean": -159.6758575439453, + "KL/std": 88.20482635498047, + "epoch": 0.23641703377386197, + "fcm_dpo/beta": 0.005624156445264816, + "fcm_dpo/delta": -0.028947748243808746, + "fcm_dpo/margin": 76.04591369628906, + "fcm_dpo/q_t": 0.40480250120162964, + "grad_norm": 17.573366165161133, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.4291399121284485, + "logits/rejected": -0.4256962835788727, + "logps/chosen": -136.06640625, + "logps/ref_chosen": -52.43647003173828, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -243.1068115234375, + "loss": 1.0919, + "margin_dpo/margin_mean": 76.04591369628906, + "margin_dpo/margin_std": 109.95503234863281, + "step": 161 + }, + { + "KL/chosen_KL_mean": -82.52705383300781, + "KL/mean": -119.3626708984375, + "KL/rejected_KL_mean": -156.19830322265625, + "KL/std": 96.73245239257812, + "epoch": 0.23788546255506607, + "fcm_dpo/beta": 0.005556900054216385, + "fcm_dpo/delta": -0.011652916669845581, + "fcm_dpo/margin": 73.67125701904297, + "fcm_dpo/q_t": 0.40978431701660156, + "grad_norm": 20.28093147277832, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.44169116020202637, + "logits/rejected": -0.42343568801879883, + "logps/chosen": -145.13763427734375, + "logps/ref_chosen": -62.6105842590332, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -245.58888244628906, + "loss": 1.1189, + "margin_dpo/margin_mean": 73.67125701904297, + "margin_dpo/margin_std": 116.21994018554688, + "step": 162 + }, + { + "KL/chosen_KL_mean": -91.48970794677734, + "KL/mean": -127.75386810302734, + "KL/rejected_KL_mean": -164.01803588867188, + "KL/std": 96.89871215820312, + "epoch": 0.2393538913362702, + "fcm_dpo/beta": 0.005600422620773315, + "fcm_dpo/delta": -0.006450829096138477, + "fcm_dpo/margin": 72.5283203125, + "fcm_dpo/q_t": 0.4107089638710022, + "grad_norm": 19.475297927856445, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.38897454738616943, + "logits/rejected": -0.3698977828025818, + "logps/chosen": -146.51133728027344, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.418212890625, + "logps/rejected": -239.43624877929688, + "loss": 1.1174, + "margin_dpo/margin_mean": 72.5283203125, + "margin_dpo/margin_std": 116.68798828125, + "step": 163 + }, + { + "KL/chosen_KL_mean": -84.55402374267578, + "KL/mean": -128.27662658691406, + "KL/rejected_KL_mean": -171.9992218017578, + "KL/std": 93.24118041992188, + "epoch": 0.24082232011747431, + "fcm_dpo/beta": 0.00548876728862524, + "fcm_dpo/delta": -0.08558943122625351, + "fcm_dpo/margin": 87.4451904296875, + "fcm_dpo/q_t": 0.39110738039016724, + "grad_norm": 19.807645797729492, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.36497557163238525, + "logits/rejected": -0.34768325090408325, + "logps/chosen": -140.1947021484375, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -251.66384887695312, + "loss": 1.0448, + "margin_dpo/margin_mean": 87.44519805908203, + "margin_dpo/margin_std": 107.89261627197266, + "step": 164 + }, + { + "KL/chosen_KL_mean": -92.47479248046875, + "KL/mean": -121.87619018554688, + "KL/rejected_KL_mean": -151.27755737304688, + "KL/std": 80.65908813476562, + "epoch": 0.2422907488986784, + "fcm_dpo/beta": 0.005550094414502382, + "fcm_dpo/delta": 0.07617515325546265, + "fcm_dpo/margin": 58.802772521972656, + "fcm_dpo/q_t": 0.4268398880958557, + "grad_norm": 22.88750457763672, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.39553022384643555, + "logits/rejected": -0.36636269092559814, + "logps/chosen": -153.78549194335938, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -224.9481658935547, + "loss": 1.1709, + "margin_dpo/margin_mean": 58.802772521972656, + "margin_dpo/margin_std": 107.60116577148438, + "step": 165 + }, + { + "KL/chosen_KL_mean": -83.616455078125, + "KL/mean": -137.0757293701172, + "KL/rejected_KL_mean": -190.53501892089844, + "KL/std": 119.21273803710938, + "epoch": 0.24375917767988253, + "fcm_dpo/beta": 0.005407451651990414, + "fcm_dpo/delta": -0.1903567910194397, + "fcm_dpo/margin": 106.9185562133789, + "fcm_dpo/q_t": 0.38125768303871155, + "grad_norm": 17.94371223449707, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.3321695327758789, + "logits/rejected": -0.3424978256225586, + "logps/chosen": -134.6000518798828, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -288.630126953125, + "loss": 1.0224, + "margin_dpo/margin_mean": 106.91854858398438, + "margin_dpo/margin_std": 150.0036163330078, + "step": 166 + }, + { + "KL/chosen_KL_mean": -84.20343780517578, + "KL/mean": -139.89492797851562, + "KL/rejected_KL_mean": -195.58641052246094, + "KL/std": 104.5910873413086, + "epoch": 0.24522760646108663, + "fcm_dpo/beta": 0.0052237361669540405, + "fcm_dpo/delta": -0.19318926334381104, + "fcm_dpo/margin": 111.38297271728516, + "fcm_dpo/q_t": 0.36922937631607056, + "grad_norm": 17.214645385742188, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.3567941188812256, + "logits/rejected": -0.36129891872406006, + "logps/chosen": -134.62753295898438, + "logps/ref_chosen": -50.424095153808594, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -291.6168212890625, + "loss": 0.9761, + "margin_dpo/margin_mean": 111.38297271728516, + "margin_dpo/margin_std": 121.13790893554688, + "step": 167 + }, + { + "KL/chosen_KL_mean": -92.93460083007812, + "KL/mean": -133.62405395507812, + "KL/rejected_KL_mean": -174.31350708007812, + "KL/std": 100.2225341796875, + "epoch": 0.24669603524229075, + "fcm_dpo/beta": 0.005147742573171854, + "fcm_dpo/delta": -0.019897453486919403, + "fcm_dpo/margin": 81.37892150878906, + "fcm_dpo/q_t": 0.40599554777145386, + "grad_norm": 20.31831932067871, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.3324066996574402, + "logits/rejected": -0.32228702306747437, + "logps/chosen": -142.39743041992188, + "logps/ref_chosen": -49.462825775146484, + "logps/ref_rejected": -75.30855560302734, + "logps/rejected": -249.6220703125, + "loss": 1.0879, + "margin_dpo/margin_mean": 81.37892150878906, + "margin_dpo/margin_std": 113.19637298583984, + "step": 168 + }, + { + "KL/chosen_KL_mean": -97.10243225097656, + "KL/mean": -133.62738037109375, + "KL/rejected_KL_mean": -170.15234375, + "KL/std": 102.53475952148438, + "epoch": 0.24816446402349487, + "fcm_dpo/beta": 0.005072026047855616, + "fcm_dpo/delta": -0.08044641464948654, + "fcm_dpo/margin": 73.04991149902344, + "fcm_dpo/q_t": 0.42030519247055054, + "grad_norm": 20.161535263061523, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.3249385356903076, + "logits/rejected": -0.3059506416320801, + "logps/chosen": -156.9058837890625, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -253.49807739257812, + "loss": 1.168, + "margin_dpo/margin_mean": 73.04991149902344, + "margin_dpo/margin_std": 141.12490844726562, + "step": 169 + }, + { + "KL/chosen_KL_mean": -85.55194091796875, + "KL/mean": -127.24555206298828, + "KL/rejected_KL_mean": -168.9391632080078, + "KL/std": 97.04989624023438, + "epoch": 0.24963289280469897, + "fcm_dpo/beta": 0.005024762358516455, + "fcm_dpo/delta": -0.02076905593276024, + "fcm_dpo/margin": 83.38722229003906, + "fcm_dpo/q_t": 0.40413689613342285, + "grad_norm": 17.05228042602539, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.3223455548286438, + "logits/rejected": -0.3122260272502899, + "logps/chosen": -135.02371215820312, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -244.85650634765625, + "loss": 1.0942, + "margin_dpo/margin_mean": 83.38722229003906, + "margin_dpo/margin_std": 117.27194213867188, + "step": 170 + }, + { + "KL/chosen_KL_mean": -121.26353454589844, + "KL/mean": -156.89358520507812, + "KL/rejected_KL_mean": -192.5236358642578, + "KL/std": 110.09349060058594, + "epoch": 0.2511013215859031, + "fcm_dpo/beta": 0.00508046243339777, + "fcm_dpo/delta": 0.039394039660692215, + "fcm_dpo/margin": 71.26011657714844, + "fcm_dpo/q_t": 0.42305469512939453, + "grad_norm": 28.202070236206055, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.31354865431785583, + "logits/rejected": -0.29599112272262573, + "logps/chosen": -205.76284790039062, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -301.9057312011719, + "loss": 1.1705, + "margin_dpo/margin_mean": 71.26011657714844, + "margin_dpo/margin_std": 142.1697235107422, + "step": 171 + }, + { + "KL/chosen_KL_mean": -104.97297668457031, + "KL/mean": -141.43255615234375, + "KL/rejected_KL_mean": -177.89212036132812, + "KL/std": 106.18240356445312, + "epoch": 0.2525697503671072, + "fcm_dpo/beta": 0.005108260549604893, + "fcm_dpo/delta": 0.028583845123648643, + "fcm_dpo/margin": 72.91913604736328, + "fcm_dpo/q_t": 0.41656991839408875, + "grad_norm": 19.106325149536133, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.38692790269851685, + "logits/rejected": -0.36780279874801636, + "logps/chosen": -173.62689208984375, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -263.32879638671875, + "loss": 1.1391, + "margin_dpo/margin_mean": 72.91913604736328, + "margin_dpo/margin_std": 123.32398986816406, + "step": 172 + }, + { + "KL/chosen_KL_mean": -95.9285659790039, + "KL/mean": -132.39529418945312, + "KL/rejected_KL_mean": -168.86203002929688, + "KL/std": 101.62166595458984, + "epoch": 0.2540381791483113, + "fcm_dpo/beta": 0.005157306790351868, + "fcm_dpo/delta": 0.0244424007833004, + "fcm_dpo/margin": 72.93345642089844, + "fcm_dpo/q_t": 0.4147951602935791, + "grad_norm": 20.805395126342773, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.38531604409217834, + "logits/rejected": -0.3533366620540619, + "logps/chosen": -158.9794464111328, + "logps/ref_chosen": -63.050880432128906, + "logps/ref_rejected": -78.68392181396484, + "logps/rejected": -247.5459442138672, + "loss": 1.1146, + "margin_dpo/margin_mean": 72.93345642089844, + "margin_dpo/margin_std": 106.28599548339844, + "step": 173 + }, + { + "KL/chosen_KL_mean": -89.95355224609375, + "KL/mean": -134.23170471191406, + "KL/rejected_KL_mean": -178.50985717773438, + "KL/std": 104.74340057373047, + "epoch": 0.2555066079295154, + "fcm_dpo/beta": 0.005131464451551437, + "fcm_dpo/delta": -0.05737413465976715, + "fcm_dpo/margin": 88.55628967285156, + "fcm_dpo/q_t": 0.3998154103755951, + "grad_norm": 24.817533493041992, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.3375644087791443, + "logits/rejected": -0.35590213537216187, + "logps/chosen": -143.3165283203125, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -280.42108154296875, + "loss": 1.0809, + "margin_dpo/margin_mean": 88.55628967285156, + "margin_dpo/margin_std": 126.68205261230469, + "step": 174 + }, + { + "KL/chosen_KL_mean": -84.00241088867188, + "KL/mean": -142.2693328857422, + "KL/rejected_KL_mean": -200.5362548828125, + "KL/std": 111.97515869140625, + "epoch": 0.25697503671071953, + "fcm_dpo/beta": 0.004940693732351065, + "fcm_dpo/delta": -0.18679270148277283, + "fcm_dpo/margin": 116.53382873535156, + "fcm_dpo/q_t": 0.36849379539489746, + "grad_norm": 25.079553604125977, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.3228433430194855, + "logits/rejected": -0.3084716796875, + "logps/chosen": -129.42018127441406, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -290.04205322265625, + "loss": 0.9676, + "margin_dpo/margin_mean": 116.53382873535156, + "margin_dpo/margin_std": 118.0002212524414, + "step": 175 + }, + { + "KL/chosen_KL_mean": -88.73968505859375, + "KL/mean": -139.49606323242188, + "KL/rejected_KL_mean": -190.25241088867188, + "KL/std": 112.8240966796875, + "epoch": 0.25844346549192365, + "fcm_dpo/beta": 0.004837565589696169, + "fcm_dpo/delta": -0.09568466246128082, + "fcm_dpo/margin": 101.51274108886719, + "fcm_dpo/q_t": 0.392004132270813, + "grad_norm": 18.32849884033203, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.3147198557853699, + "logits/rejected": -0.332048237323761, + "logps/chosen": -139.19252014160156, + "logps/ref_chosen": -50.452842712402344, + "logps/ref_rejected": -95.5589599609375, + "logps/rejected": -285.8114013671875, + "loss": 1.0415, + "margin_dpo/margin_mean": 101.51274108886719, + "margin_dpo/margin_std": 129.95030212402344, + "step": 176 + }, + { + "KL/chosen_KL_mean": -102.09382629394531, + "KL/mean": -152.91012573242188, + "KL/rejected_KL_mean": -203.72640991210938, + "KL/std": 122.18125915527344, + "epoch": 0.2599118942731278, + "fcm_dpo/beta": 0.004758263938128948, + "fcm_dpo/delta": -0.0877579003572464, + "fcm_dpo/margin": 101.63257598876953, + "fcm_dpo/q_t": 0.3923775553703308, + "grad_norm": 19.02363395690918, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.3660031855106354, + "logits/rejected": -0.3579842448234558, + "logps/chosen": -163.310302734375, + "logps/ref_chosen": -61.216468811035156, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -299.62017822265625, + "loss": 1.0435, + "margin_dpo/margin_mean": 101.63257598876953, + "margin_dpo/margin_std": 129.4840545654297, + "step": 177 + }, + { + "KL/chosen_KL_mean": -114.02023315429688, + "KL/mean": -175.93092346191406, + "KL/rejected_KL_mean": -237.8416290283203, + "KL/std": 142.59483337402344, + "epoch": 0.26138032305433184, + "fcm_dpo/beta": 0.004583236761391163, + "fcm_dpo/delta": -0.17850404977798462, + "fcm_dpo/margin": 123.82139587402344, + "fcm_dpo/q_t": 0.3769300878047943, + "grad_norm": 22.4151554107666, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.2533833086490631, + "logits/rejected": -0.24754983186721802, + "logps/chosen": -172.28501892089844, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.3653335571289, + "logps/rejected": -343.20697021484375, + "loss": 1.0058, + "margin_dpo/margin_mean": 123.82139587402344, + "margin_dpo/margin_std": 155.78990173339844, + "step": 178 + }, + { + "KL/chosen_KL_mean": -119.15005493164062, + "KL/mean": -162.2534637451172, + "KL/rejected_KL_mean": -205.35687255859375, + "KL/std": 122.278076171875, + "epoch": 0.26284875183553597, + "fcm_dpo/beta": 0.004542763344943523, + "fcm_dpo/delta": 0.008336875587701797, + "fcm_dpo/margin": 86.20682525634766, + "fcm_dpo/q_t": 0.4141519069671631, + "grad_norm": 29.362442016601562, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.287581205368042, + "logits/rejected": -0.29421767592430115, + "logps/chosen": -180.2083740234375, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -295.88470458984375, + "loss": 1.146, + "margin_dpo/margin_mean": 86.20682525634766, + "margin_dpo/margin_std": 153.45443725585938, + "step": 179 + }, + { + "KL/chosen_KL_mean": -102.34881591796875, + "KL/mean": -159.50070190429688, + "KL/rejected_KL_mean": -216.65260314941406, + "KL/std": 110.17591857910156, + "epoch": 0.2643171806167401, + "fcm_dpo/beta": 0.004480388015508652, + "fcm_dpo/delta": -0.11835239082574844, + "fcm_dpo/margin": 114.30378723144531, + "fcm_dpo/q_t": 0.38382506370544434, + "grad_norm": 20.13519859313965, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.27647683024406433, + "logits/rejected": -0.27314233779907227, + "logps/chosen": -156.6915283203125, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -314.86444091796875, + "loss": 1.0244, + "margin_dpo/margin_mean": 114.30378723144531, + "margin_dpo/margin_std": 138.1670379638672, + "step": 180 + }, + { + "KL/chosen_KL_mean": -88.83747100830078, + "KL/mean": -122.73734283447266, + "KL/rejected_KL_mean": -156.63722229003906, + "KL/std": 99.75308990478516, + "epoch": 0.2657856093979442, + "fcm_dpo/beta": 0.004530083388090134, + "fcm_dpo/delta": 0.0955948680639267, + "fcm_dpo/margin": 67.79975128173828, + "fcm_dpo/q_t": 0.4301533102989197, + "grad_norm": 19.06928825378418, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.30789846181869507, + "logits/rejected": -0.2823750972747803, + "logps/chosen": -143.83792114257812, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -218.29339599609375, + "loss": 1.1817, + "margin_dpo/margin_mean": 67.79975128173828, + "margin_dpo/margin_std": 127.82617950439453, + "step": 181 + }, + { + "KL/chosen_KL_mean": -82.40182495117188, + "KL/mean": -144.41375732421875, + "KL/rejected_KL_mean": -206.42568969726562, + "KL/std": 120.55620574951172, + "epoch": 0.26725403817914833, + "fcm_dpo/beta": 0.0044230264611542225, + "fcm_dpo/delta": -0.1580391675233841, + "fcm_dpo/margin": 124.02388000488281, + "fcm_dpo/q_t": 0.37559401988983154, + "grad_norm": 17.178207397460938, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.27274084091186523, + "logits/rejected": -0.28051310777664185, + "logps/chosen": -123.50967407226562, + "logps/ref_chosen": -41.107852935791016, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -295.947265625, + "loss": 1.0052, + "margin_dpo/margin_mean": 124.02387237548828, + "margin_dpo/margin_std": 144.7704620361328, + "step": 182 + }, + { + "KL/chosen_KL_mean": -124.9429931640625, + "KL/mean": -156.95785522460938, + "KL/rejected_KL_mean": -188.97271728515625, + "KL/std": 100.42144012451172, + "epoch": 0.2687224669603524, + "fcm_dpo/beta": 0.004367251414805651, + "fcm_dpo/delta": -0.04111050069332123, + "fcm_dpo/margin": 64.02972412109375, + "fcm_dpo/q_t": 0.4345667362213135, + "grad_norm": 21.047754287719727, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.3011692762374878, + "logits/rejected": -0.2902328372001648, + "logps/chosen": -182.46755981445312, + "logps/ref_chosen": -57.52456283569336, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -264.9484558105469, + "loss": 1.1896, + "margin_dpo/margin_mean": 64.02973175048828, + "margin_dpo/margin_std": 109.3995132446289, + "step": 183 + }, + { + "KL/chosen_KL_mean": -106.86518859863281, + "KL/mean": -141.30413818359375, + "KL/rejected_KL_mean": -175.74305725097656, + "KL/std": 97.58000183105469, + "epoch": 0.2701908957415565, + "fcm_dpo/beta": 0.00436544232070446, + "fcm_dpo/delta": -0.0041433474980294704, + "fcm_dpo/margin": 68.87787628173828, + "fcm_dpo/q_t": 0.4317898750305176, + "grad_norm": 17.791168212890625, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.33284837007522583, + "logits/rejected": -0.32246139645576477, + "logps/chosen": -165.41015625, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -252.37713623046875, + "loss": 1.1784, + "margin_dpo/margin_mean": 68.87787628173828, + "margin_dpo/margin_std": 122.99388122558594, + "step": 184 + }, + { + "KL/chosen_KL_mean": -114.82501983642578, + "KL/mean": -141.30067443847656, + "KL/rejected_KL_mean": -167.77633666992188, + "KL/std": 109.46660614013672, + "epoch": 0.27165932452276065, + "fcm_dpo/beta": 0.004431641660630703, + "fcm_dpo/delta": 0.0773247703909874, + "fcm_dpo/margin": 52.95130157470703, + "fcm_dpo/q_t": 0.44948315620422363, + "grad_norm": 19.48412322998047, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.2981659173965454, + "logits/rejected": -0.2731373608112335, + "logps/chosen": -176.85086059570312, + "logps/ref_chosen": -62.025848388671875, + "logps/ref_rejected": -73.7625961303711, + "logps/rejected": -241.5389404296875, + "loss": 1.2441, + "margin_dpo/margin_mean": 52.9513053894043, + "margin_dpo/margin_std": 132.49623107910156, + "step": 185 + }, + { + "KL/chosen_KL_mean": -105.06900787353516, + "KL/mean": -157.23483276367188, + "KL/rejected_KL_mean": -209.400634765625, + "KL/std": 109.59912109375, + "epoch": 0.27312775330396477, + "fcm_dpo/beta": 0.004385577980428934, + "fcm_dpo/delta": -0.06044544652104378, + "fcm_dpo/margin": 104.33164978027344, + "fcm_dpo/q_t": 0.39578211307525635, + "grad_norm": 20.40852928161621, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.3577519655227661, + "logits/rejected": -0.3329923450946808, + "logps/chosen": -174.4224853515625, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -297.47308349609375, + "loss": 1.0496, + "margin_dpo/margin_mean": 104.33164978027344, + "margin_dpo/margin_std": 125.84209442138672, + "step": 186 + }, + { + "KL/chosen_KL_mean": -99.39851379394531, + "KL/mean": -143.77651977539062, + "KL/rejected_KL_mean": -188.154541015625, + "KL/std": 107.75646209716797, + "epoch": 0.2745961820851689, + "fcm_dpo/beta": 0.004401649348437786, + "fcm_dpo/delta": 0.00935973972082138, + "fcm_dpo/margin": 88.75602722167969, + "fcm_dpo/q_t": 0.4099273979663849, + "grad_norm": 21.53246307373047, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.3320094645023346, + "logits/rejected": -0.32622426748275757, + "logps/chosen": -152.15496826171875, + "logps/ref_chosen": -52.7564582824707, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -270.1236267089844, + "loss": 1.0945, + "margin_dpo/margin_mean": 88.75602722167969, + "margin_dpo/margin_std": 114.76980590820312, + "step": 187 + }, + { + "KL/chosen_KL_mean": -93.59693908691406, + "KL/mean": -147.21905517578125, + "KL/rejected_KL_mean": -200.8411865234375, + "KL/std": 118.67021179199219, + "epoch": 0.27606461086637296, + "fcm_dpo/beta": 0.004345991648733616, + "fcm_dpo/delta": -0.0692645013332367, + "fcm_dpo/margin": 107.2442626953125, + "fcm_dpo/q_t": 0.39470282196998596, + "grad_norm": 31.087356567382812, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.328615665435791, + "logits/rejected": -0.3348105847835541, + "logps/chosen": -143.01242065429688, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -290.3816223144531, + "loss": 1.0482, + "margin_dpo/margin_mean": 107.2442626953125, + "margin_dpo/margin_std": 131.91983032226562, + "step": 188 + }, + { + "KL/chosen_KL_mean": -109.96575927734375, + "KL/mean": -154.14344787597656, + "KL/rejected_KL_mean": -198.32113647460938, + "KL/std": 121.00015258789062, + "epoch": 0.2775330396475771, + "fcm_dpo/beta": 0.0043410686776041985, + "fcm_dpo/delta": 0.01706843078136444, + "fcm_dpo/margin": 88.35537719726562, + "fcm_dpo/q_t": 0.41580936312675476, + "grad_norm": 24.516483306884766, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.31679028272628784, + "logits/rejected": -0.29732340574264526, + "logps/chosen": -162.36471557617188, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -270.4884948730469, + "loss": 1.1315, + "margin_dpo/margin_mean": 88.35537719726562, + "margin_dpo/margin_std": 149.4475860595703, + "step": 189 + }, + { + "KL/chosen_KL_mean": -119.08251953125, + "KL/mean": -171.51473999023438, + "KL/rejected_KL_mean": -223.94699096679688, + "KL/std": 130.3875732421875, + "epoch": 0.2790014684287812, + "fcm_dpo/beta": 0.004339671693742275, + "fcm_dpo/delta": -0.058640651404857635, + "fcm_dpo/margin": 104.86446380615234, + "fcm_dpo/q_t": 0.39942899346351624, + "grad_norm": 18.538650512695312, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.3367846608161926, + "logits/rejected": -0.32651811838150024, + "logps/chosen": -183.76559448242188, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -326.49749755859375, + "loss": 1.0841, + "margin_dpo/margin_mean": 104.86446380615234, + "margin_dpo/margin_std": 149.67710876464844, + "step": 190 + }, + { + "KL/chosen_KL_mean": -109.22671508789062, + "KL/mean": -184.44924926757812, + "KL/rejected_KL_mean": -259.6717834472656, + "KL/std": 148.47653198242188, + "epoch": 0.28046989720998533, + "fcm_dpo/beta": 0.004123254679143429, + "fcm_dpo/delta": -0.2364530861377716, + "fcm_dpo/margin": 150.44508361816406, + "fcm_dpo/q_t": 0.36313188076019287, + "grad_norm": 19.302614212036133, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.31450676918029785, + "logits/rejected": -0.2896798849105835, + "logps/chosen": -177.88558959960938, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -369.81146240234375, + "loss": 0.9577, + "margin_dpo/margin_mean": 150.44508361816406, + "margin_dpo/margin_std": 164.7996826171875, + "step": 191 + }, + { + "KL/chosen_KL_mean": -138.94046020507812, + "KL/mean": -188.46331787109375, + "KL/rejected_KL_mean": -237.9861602783203, + "KL/std": 130.4176025390625, + "epoch": 0.28193832599118945, + "fcm_dpo/beta": 0.004099044483155012, + "fcm_dpo/delta": -0.0063680801540613174, + "fcm_dpo/margin": 99.04568481445312, + "fcm_dpo/q_t": 0.4107716679573059, + "grad_norm": 21.972942352294922, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.28979045152664185, + "logits/rejected": -0.2879485487937927, + "logps/chosen": -208.66738891601562, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -341.3074951171875, + "loss": 1.1173, + "margin_dpo/margin_mean": 99.04568481445312, + "margin_dpo/margin_std": 159.619140625, + "step": 192 + }, + { + "KL/chosen_KL_mean": -135.95660400390625, + "KL/mean": -166.56283569335938, + "KL/rejected_KL_mean": -197.16908264160156, + "KL/std": 121.18572998046875, + "epoch": 0.2834067547723935, + "fcm_dpo/beta": 0.004099993035197258, + "fcm_dpo/delta": 0.03158862516283989, + "fcm_dpo/margin": 61.212486267089844, + "fcm_dpo/q_t": 0.4411364793777466, + "grad_norm": 28.599891662597656, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.3054850101470947, + "logits/rejected": -0.2954953908920288, + "logps/chosen": -196.1470947265625, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -273.5766296386719, + "loss": 1.2463, + "margin_dpo/margin_mean": 61.21249008178711, + "margin_dpo/margin_std": 154.33920288085938, + "step": 193 + }, + { + "KL/chosen_KL_mean": -86.49054718017578, + "KL/mean": -132.54513549804688, + "KL/rejected_KL_mean": -178.5997314453125, + "KL/std": 100.00372314453125, + "epoch": 0.28487518355359764, + "fcm_dpo/beta": 0.004112754482775927, + "fcm_dpo/delta": 0.02185986563563347, + "fcm_dpo/margin": 92.10918426513672, + "fcm_dpo/q_t": 0.4111691117286682, + "grad_norm": 17.3194637298584, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.22373536229133606, + "logits/rejected": -0.20780496299266815, + "logps/chosen": -124.33091735839844, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -239.28451538085938, + "loss": 1.0929, + "margin_dpo/margin_mean": 92.10918426513672, + "margin_dpo/margin_std": 109.87422943115234, + "step": 194 + }, + { + "KL/chosen_KL_mean": -134.20848083496094, + "KL/mean": -185.5911865234375, + "KL/rejected_KL_mean": -236.97390747070312, + "KL/std": 123.36296081542969, + "epoch": 0.28634361233480177, + "fcm_dpo/beta": 0.004115342628210783, + "fcm_dpo/delta": -0.023933224380016327, + "fcm_dpo/margin": 102.76541900634766, + "fcm_dpo/q_t": 0.4027373194694519, + "grad_norm": 20.05968475341797, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.2823333740234375, + "logits/rejected": -0.29932117462158203, + "logps/chosen": -189.1000518798828, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -333.744873046875, + "loss": 1.0699, + "margin_dpo/margin_mean": 102.76541137695312, + "margin_dpo/margin_std": 126.29493713378906, + "step": 195 + }, + { + "KL/chosen_KL_mean": -106.2418212890625, + "KL/mean": -163.61598205566406, + "KL/rejected_KL_mean": -220.99012756347656, + "KL/std": 124.76324462890625, + "epoch": 0.2878120411160059, + "fcm_dpo/beta": 0.004045085981488228, + "fcm_dpo/delta": -0.06845034658908844, + "fcm_dpo/margin": 114.74829864501953, + "fcm_dpo/q_t": 0.39634737372398376, + "grad_norm": 16.996543884277344, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.3161476254463196, + "logits/rejected": -0.30336615443229675, + "logps/chosen": -159.487060546875, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -297.0430603027344, + "loss": 1.0639, + "margin_dpo/margin_mean": 114.74829864501953, + "margin_dpo/margin_std": 149.60369873046875, + "step": 196 + }, + { + "KL/chosen_KL_mean": -109.41055297851562, + "KL/mean": -152.39816284179688, + "KL/rejected_KL_mean": -195.3857879638672, + "KL/std": 105.53937530517578, + "epoch": 0.28928046989721, + "fcm_dpo/beta": 0.004100443329662085, + "fcm_dpo/delta": 0.048719555139541626, + "fcm_dpo/margin": 85.9752197265625, + "fcm_dpo/q_t": 0.4195671081542969, + "grad_norm": 18.929014205932617, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.2705003023147583, + "logits/rejected": -0.264984130859375, + "logps/chosen": -169.8308868408203, + "logps/ref_chosen": -60.42033386230469, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -272.5946960449219, + "loss": 1.1245, + "margin_dpo/margin_mean": 85.9752197265625, + "margin_dpo/margin_std": 122.52574920654297, + "step": 197 + }, + { + "KL/chosen_KL_mean": -121.74513244628906, + "KL/mean": -178.72122192382812, + "KL/rejected_KL_mean": -235.69732666015625, + "KL/std": 134.29644775390625, + "epoch": 0.2907488986784141, + "fcm_dpo/beta": 0.0040567112155258656, + "fcm_dpo/delta": -0.06527149677276611, + "fcm_dpo/margin": 113.95219421386719, + "fcm_dpo/q_t": 0.39734193682670593, + "grad_norm": 20.2078914642334, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.3216399848461151, + "logits/rejected": -0.3219534158706665, + "logps/chosen": -176.78131103515625, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -332.9405822753906, + "loss": 1.0587, + "margin_dpo/margin_mean": 113.95220184326172, + "margin_dpo/margin_std": 150.6449737548828, + "step": 198 + }, + { + "KL/chosen_KL_mean": -117.42064666748047, + "KL/mean": -173.71978759765625, + "KL/rejected_KL_mean": -230.0189666748047, + "KL/std": 121.98558044433594, + "epoch": 0.2922173274596182, + "fcm_dpo/beta": 0.003988361917436123, + "fcm_dpo/delta": -0.05231431871652603, + "fcm_dpo/margin": 112.59829711914062, + "fcm_dpo/q_t": 0.3982279896736145, + "grad_norm": 19.83755111694336, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.33298349380493164, + "logits/rejected": -0.3157057762145996, + "logps/chosen": -174.24948120117188, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -314.66717529296875, + "loss": 1.0711, + "margin_dpo/margin_mean": 112.59829711914062, + "margin_dpo/margin_std": 149.75167846679688, + "step": 199 + }, + { + "KL/chosen_KL_mean": -118.09941101074219, + "KL/mean": -175.55503845214844, + "KL/rejected_KL_mean": -233.01065063476562, + "KL/std": 135.21307373046875, + "epoch": 0.2936857562408223, + "fcm_dpo/beta": 0.003952971659600735, + "fcm_dpo/delta": -0.05722919851541519, + "fcm_dpo/margin": 114.91127014160156, + "fcm_dpo/q_t": 0.4002299904823303, + "grad_norm": 27.9632625579834, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.2579571604728699, + "logits/rejected": -0.24183320999145508, + "logps/chosen": -171.16647338867188, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -313.61907958984375, + "loss": 1.0904, + "margin_dpo/margin_mean": 114.9112548828125, + "margin_dpo/margin_std": 175.34649658203125, + "step": 200 + }, + { + "KL/chosen_KL_mean": -124.49310302734375, + "KL/mean": -181.124267578125, + "KL/rejected_KL_mean": -237.75543212890625, + "KL/std": 141.97161865234375, + "epoch": 0.29515418502202645, + "fcm_dpo/beta": 0.003938804380595684, + "fcm_dpo/delta": -0.04832981526851654, + "fcm_dpo/margin": 113.26231384277344, + "fcm_dpo/q_t": 0.399710088968277, + "grad_norm": 20.296661376953125, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.35240548849105835, + "logits/rejected": -0.35075196623802185, + "logps/chosen": -199.89532470703125, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -352.56365966796875, + "loss": 1.079, + "margin_dpo/margin_mean": 113.26231384277344, + "margin_dpo/margin_std": 158.4004669189453, + "step": 201 + }, + { + "KL/chosen_KL_mean": -125.97883605957031, + "KL/mean": -165.76266479492188, + "KL/rejected_KL_mean": -205.54647827148438, + "KL/std": 121.41524505615234, + "epoch": 0.2966226138032305, + "fcm_dpo/beta": 0.003970341291278601, + "fcm_dpo/delta": 0.08675570785999298, + "fcm_dpo/margin": 79.567626953125, + "fcm_dpo/q_t": 0.4300415515899658, + "grad_norm": 21.754104614257812, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.2775609493255615, + "logits/rejected": -0.29300734400749207, + "logps/chosen": -176.0801544189453, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -292.531494140625, + "loss": 1.1859, + "margin_dpo/margin_mean": 79.567626953125, + "margin_dpo/margin_std": 157.5809326171875, + "step": 202 + }, + { + "KL/chosen_KL_mean": -122.95875549316406, + "KL/mean": -171.21636962890625, + "KL/rejected_KL_mean": -219.47396850585938, + "KL/std": 122.28076934814453, + "epoch": 0.29809104258443464, + "fcm_dpo/beta": 0.003991924226284027, + "fcm_dpo/delta": 0.01531082671135664, + "fcm_dpo/margin": 96.51522827148438, + "fcm_dpo/q_t": 0.41198647022247314, + "grad_norm": 18.4498233795166, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.30969899892807007, + "logits/rejected": -0.2990788519382477, + "logps/chosen": -183.56845092773438, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -305.36993408203125, + "loss": 1.105, + "margin_dpo/margin_mean": 96.51522827148438, + "margin_dpo/margin_std": 135.06640625, + "step": 203 + }, + { + "KL/chosen_KL_mean": -139.44410705566406, + "KL/mean": -183.3643341064453, + "KL/rejected_KL_mean": -227.28453063964844, + "KL/std": 135.77395629882812, + "epoch": 0.29955947136563876, + "fcm_dpo/beta": 0.00402648001909256, + "fcm_dpo/delta": 0.04800150915980339, + "fcm_dpo/margin": 87.84043884277344, + "fcm_dpo/q_t": 0.42272064089775085, + "grad_norm": 23.171846389770508, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.40281885862350464, + "logits/rejected": -0.3679213225841522, + "logps/chosen": -219.60906982421875, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -314.9804382324219, + "loss": 1.1632, + "margin_dpo/margin_mean": 87.84043884277344, + "margin_dpo/margin_std": 163.03756713867188, + "step": 204 + }, + { + "KL/chosen_KL_mean": -131.06268310546875, + "KL/mean": -188.67852783203125, + "KL/rejected_KL_mean": -246.2943572998047, + "KL/std": 136.5657958984375, + "epoch": 0.3010279001468429, + "fcm_dpo/beta": 0.003965743817389011, + "fcm_dpo/delta": -0.061633773148059845, + "fcm_dpo/margin": 115.23167419433594, + "fcm_dpo/q_t": 0.3951718211174011, + "grad_norm": 19.90717124938965, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.30089646577835083, + "logits/rejected": -0.27246442437171936, + "logps/chosen": -190.44741821289062, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -331.4194030761719, + "loss": 1.0691, + "margin_dpo/margin_mean": 115.23167419433594, + "margin_dpo/margin_std": 149.4026641845703, + "step": 205 + }, + { + "KL/chosen_KL_mean": -116.30944061279297, + "KL/mean": -179.20240783691406, + "KL/rejected_KL_mean": -242.0953826904297, + "KL/std": 123.43933868408203, + "epoch": 0.302496328928047, + "fcm_dpo/beta": 0.0039128996431827545, + "fcm_dpo/delta": -0.0976862832903862, + "fcm_dpo/margin": 125.78593444824219, + "fcm_dpo/q_t": 0.38541656732559204, + "grad_norm": 26.1035099029541, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.2364203929901123, + "logits/rejected": -0.2409205287694931, + "logps/chosen": -163.27394104003906, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -341.04888916015625, + "loss": 1.0192, + "margin_dpo/margin_mean": 125.78593444824219, + "margin_dpo/margin_std": 134.7174072265625, + "step": 206 + }, + { + "KL/chosen_KL_mean": -106.32139587402344, + "KL/mean": -176.8770751953125, + "KL/rejected_KL_mean": -247.43272399902344, + "KL/std": 138.58750915527344, + "epoch": 0.3039647577092511, + "fcm_dpo/beta": 0.0038270847871899605, + "fcm_dpo/delta": -0.14793969690799713, + "fcm_dpo/margin": 141.11134338378906, + "fcm_dpo/q_t": 0.37947410345077515, + "grad_norm": 31.017250061035156, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.3106921911239624, + "logits/rejected": -0.2881418466567993, + "logps/chosen": -162.37765502929688, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -331.8805236816406, + "loss": 0.9999, + "margin_dpo/margin_mean": 141.111328125, + "margin_dpo/margin_std": 160.7344970703125, + "step": 207 + }, + { + "KL/chosen_KL_mean": -168.3114776611328, + "KL/mean": -226.00588989257812, + "KL/rejected_KL_mean": -283.7003173828125, + "KL/std": 139.61465454101562, + "epoch": 0.3054331864904552, + "fcm_dpo/beta": 0.0037661269307136536, + "fcm_dpo/delta": -0.03632538765668869, + "fcm_dpo/margin": 115.3888168334961, + "fcm_dpo/q_t": 0.4015238881111145, + "grad_norm": 28.11204719543457, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.34051740169525146, + "logits/rejected": -0.32537776231765747, + "logps/chosen": -235.37908935546875, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -377.9872131347656, + "loss": 1.0934, + "margin_dpo/margin_mean": 115.3888168334961, + "margin_dpo/margin_std": 170.67803955078125, + "step": 208 + }, + { + "KL/chosen_KL_mean": -137.9810028076172, + "KL/mean": -188.3424835205078, + "KL/rejected_KL_mean": -238.7039794921875, + "KL/std": 125.35417175292969, + "epoch": 0.3069016152716593, + "fcm_dpo/beta": 0.003780151717364788, + "fcm_dpo/delta": 0.020004911348223686, + "fcm_dpo/margin": 100.72296142578125, + "fcm_dpo/q_t": 0.41364553570747375, + "grad_norm": 24.225013732910156, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.23914602398872375, + "logits/rejected": -0.22716867923736572, + "logps/chosen": -194.16268920898438, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -319.6455078125, + "loss": 1.1282, + "margin_dpo/margin_mean": 100.72296142578125, + "margin_dpo/margin_std": 158.71670532226562, + "step": 209 + }, + { + "KL/chosen_KL_mean": -121.80764770507812, + "KL/mean": -179.4827880859375, + "KL/rejected_KL_mean": -237.15792846679688, + "KL/std": 125.80329895019531, + "epoch": 0.30837004405286345, + "fcm_dpo/beta": 0.003775266231968999, + "fcm_dpo/delta": -0.03721902519464493, + "fcm_dpo/margin": 115.35026550292969, + "fcm_dpo/q_t": 0.40104708075523376, + "grad_norm": 22.758333206176758, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.27275052666664124, + "logits/rejected": -0.2652055025100708, + "logps/chosen": -168.17947387695312, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -313.83953857421875, + "loss": 1.0734, + "margin_dpo/margin_mean": 115.35028076171875, + "margin_dpo/margin_std": 150.81455993652344, + "step": 210 + }, + { + "KL/chosen_KL_mean": -178.0186767578125, + "KL/mean": -228.12051391601562, + "KL/rejected_KL_mean": -278.22235107421875, + "KL/std": 146.26031494140625, + "epoch": 0.30983847283406757, + "fcm_dpo/beta": 0.00377118238247931, + "fcm_dpo/delta": 0.02294088713824749, + "fcm_dpo/margin": 100.20367431640625, + "fcm_dpo/q_t": 0.41908180713653564, + "grad_norm": 27.190279006958008, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.3228422999382019, + "logits/rejected": -0.2874525487422943, + "logps/chosen": -256.9510192871094, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -365.0433349609375, + "loss": 1.1417, + "margin_dpo/margin_mean": 100.20367431640625, + "margin_dpo/margin_std": 171.17764282226562, + "step": 211 + }, + { + "KL/chosen_KL_mean": -148.94281005859375, + "KL/mean": -219.05404663085938, + "KL/rejected_KL_mean": -289.165283203125, + "KL/std": 156.24212646484375, + "epoch": 0.31130690161527164, + "fcm_dpo/beta": 0.0036756170447915792, + "fcm_dpo/delta": -0.12374652922153473, + "fcm_dpo/margin": 140.2224884033203, + "fcm_dpo/q_t": 0.3847534656524658, + "grad_norm": 24.610458374023438, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.29684221744537354, + "logits/rejected": -0.29030919075012207, + "logps/chosen": -207.1398162841797, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05785369873047, + "logps/rejected": -392.2231140136719, + "loss": 1.0371, + "margin_dpo/margin_mean": 140.22247314453125, + "margin_dpo/margin_std": 175.03460693359375, + "step": 212 + }, + { + "KL/chosen_KL_mean": -138.5974884033203, + "KL/mean": -207.8812713623047, + "KL/rejected_KL_mean": -277.1650390625, + "KL/std": 138.962158203125, + "epoch": 0.31277533039647576, + "fcm_dpo/beta": 0.003611032385379076, + "fcm_dpo/delta": -0.1061759814620018, + "fcm_dpo/margin": 138.56756591796875, + "fcm_dpo/q_t": 0.3859563171863556, + "grad_norm": 24.44261360168457, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.3207147717475891, + "logits/rejected": -0.29644298553466797, + "logps/chosen": -206.11019897460938, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -371.07977294921875, + "loss": 1.0262, + "margin_dpo/margin_mean": 138.56756591796875, + "margin_dpo/margin_std": 162.03538513183594, + "step": 213 + }, + { + "KL/chosen_KL_mean": -118.24832916259766, + "KL/mean": -180.09129333496094, + "KL/rejected_KL_mean": -241.93423461914062, + "KL/std": 136.1957550048828, + "epoch": 0.3142437591776799, + "fcm_dpo/beta": 0.003579269163310528, + "fcm_dpo/delta": -0.04467523843050003, + "fcm_dpo/margin": 123.68592071533203, + "fcm_dpo/q_t": 0.4001598358154297, + "grad_norm": 21.233543395996094, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.27026987075805664, + "logits/rejected": -0.2760501205921173, + "logps/chosen": -159.85321044921875, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -319.45166015625, + "loss": 1.0675, + "margin_dpo/margin_mean": 123.68592834472656, + "margin_dpo/margin_std": 161.08143615722656, + "step": 214 + }, + { + "KL/chosen_KL_mean": -144.5403289794922, + "KL/mean": -206.9398193359375, + "KL/rejected_KL_mean": -269.33929443359375, + "KL/std": 141.94100952148438, + "epoch": 0.315712187958884, + "fcm_dpo/beta": 0.003539241384714842, + "fcm_dpo/delta": -0.043820615857839584, + "fcm_dpo/margin": 124.79898071289062, + "fcm_dpo/q_t": 0.3984990119934082, + "grad_norm": 22.27779769897461, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.30568164587020874, + "logits/rejected": -0.3003992736339569, + "logps/chosen": -197.81959533691406, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -359.303955078125, + "loss": 1.0582, + "margin_dpo/margin_mean": 124.79898071289062, + "margin_dpo/margin_std": 149.58108520507812, + "step": 215 + }, + { + "KL/chosen_KL_mean": -145.63059997558594, + "KL/mean": -207.81692504882812, + "KL/rejected_KL_mean": -270.00323486328125, + "KL/std": 146.82232666015625, + "epoch": 0.31718061674008813, + "fcm_dpo/beta": 0.003528446890413761, + "fcm_dpo/delta": -0.04068659618496895, + "fcm_dpo/margin": 124.37261199951172, + "fcm_dpo/q_t": 0.40117913484573364, + "grad_norm": 22.973649978637695, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.3099059760570526, + "logits/rejected": -0.29590481519699097, + "logps/chosen": -194.51840209960938, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -347.2021484375, + "loss": 1.0879, + "margin_dpo/margin_mean": 124.37261199951172, + "margin_dpo/margin_std": 179.5612030029297, + "step": 216 + }, + { + "KL/chosen_KL_mean": -145.95330810546875, + "KL/mean": -222.22024536132812, + "KL/rejected_KL_mean": -298.4871826171875, + "KL/std": 154.306396484375, + "epoch": 0.3186490455212922, + "fcm_dpo/beta": 0.0034370056819170713, + "fcm_dpo/delta": -0.13140688836574554, + "fcm_dpo/margin": 152.53384399414062, + "fcm_dpo/q_t": 0.3825801610946655, + "grad_norm": 19.35890007019043, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.2936730980873108, + "logits/rejected": -0.2913660407066345, + "logps/chosen": -195.79861450195312, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -398.56549072265625, + "loss": 1.0126, + "margin_dpo/margin_mean": 152.53384399414062, + "margin_dpo/margin_std": 178.2061767578125, + "step": 217 + }, + { + "KL/chosen_KL_mean": -150.1802215576172, + "KL/mean": -208.71240234375, + "KL/rejected_KL_mean": -267.24462890625, + "KL/std": 148.29302978515625, + "epoch": 0.3201174743024963, + "fcm_dpo/beta": 0.003414642531424761, + "fcm_dpo/delta": 0.00023527629673480988, + "fcm_dpo/margin": 117.06442260742188, + "fcm_dpo/q_t": 0.4108254313468933, + "grad_norm": 20.17465591430664, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.304365873336792, + "logits/rejected": -0.3002937436103821, + "logps/chosen": -208.75689697265625, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -355.09100341796875, + "loss": 1.1119, + "margin_dpo/margin_mean": 117.06441497802734, + "margin_dpo/margin_std": 178.00685119628906, + "step": 218 + }, + { + "KL/chosen_KL_mean": -161.31483459472656, + "KL/mean": -210.52394104003906, + "KL/rejected_KL_mean": -259.7330627441406, + "KL/std": 153.1935577392578, + "epoch": 0.32158590308370044, + "fcm_dpo/beta": 0.0034687574952840805, + "fcm_dpo/delta": 0.059782225638628006, + "fcm_dpo/margin": 98.41824340820312, + "fcm_dpo/q_t": 0.42399221658706665, + "grad_norm": 28.482004165649414, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.3322892487049103, + "logits/rejected": -0.3280831575393677, + "logps/chosen": -222.39869689941406, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -345.5634765625, + "loss": 1.1775, + "margin_dpo/margin_mean": 98.4182357788086, + "margin_dpo/margin_std": 191.77099609375, + "step": 219 + }, + { + "KL/chosen_KL_mean": -178.02310180664062, + "KL/mean": -218.76882934570312, + "KL/rejected_KL_mean": -259.5145568847656, + "KL/std": 128.1570587158203, + "epoch": 0.32305433186490456, + "fcm_dpo/beta": 0.0035150342155247927, + "fcm_dpo/delta": 0.11705435812473297, + "fcm_dpo/margin": 81.49143981933594, + "fcm_dpo/q_t": 0.43284928798675537, + "grad_norm": 23.411617279052734, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.34668925404548645, + "logits/rejected": -0.32663899660110474, + "logps/chosen": -248.0543975830078, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -347.2000732421875, + "loss": 1.1759, + "margin_dpo/margin_mean": 81.49143981933594, + "margin_dpo/margin_std": 136.2982177734375, + "step": 220 + }, + { + "KL/chosen_KL_mean": -148.138427734375, + "KL/mean": -238.5886993408203, + "KL/rejected_KL_mean": -329.03900146484375, + "KL/std": 168.93533325195312, + "epoch": 0.3245227606461087, + "fcm_dpo/beta": 0.0034304747823625803, + "fcm_dpo/delta": -0.23470783233642578, + "fcm_dpo/margin": 180.9005584716797, + "fcm_dpo/q_t": 0.3586632013320923, + "grad_norm": 30.417999267578125, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.3302137851715088, + "logits/rejected": -0.3399258852005005, + "logps/chosen": -200.29312133789062, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -435.50665283203125, + "loss": 0.9405, + "margin_dpo/margin_mean": 180.9005584716797, + "margin_dpo/margin_std": 174.17965698242188, + "step": 221 + }, + { + "KL/chosen_KL_mean": -150.40621948242188, + "KL/mean": -219.40045166015625, + "KL/rejected_KL_mean": -288.39471435546875, + "KL/std": 150.12142944335938, + "epoch": 0.32599118942731276, + "fcm_dpo/beta": 0.0033583808690309525, + "fcm_dpo/delta": -0.06650819629430771, + "fcm_dpo/margin": 137.9884796142578, + "fcm_dpo/q_t": 0.39468562602996826, + "grad_norm": 26.266162872314453, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.3900914788246155, + "logits/rejected": -0.39828717708587646, + "logps/chosen": -211.3773193359375, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -388.3958740234375, + "loss": 1.0513, + "margin_dpo/margin_mean": 137.9884796142578, + "margin_dpo/margin_std": 170.7541046142578, + "step": 222 + }, + { + "KL/chosen_KL_mean": -167.1275634765625, + "KL/mean": -225.02340698242188, + "KL/rejected_KL_mean": -282.91925048828125, + "KL/std": 149.61036682128906, + "epoch": 0.3274596182085169, + "fcm_dpo/beta": 0.0033374596387147903, + "fcm_dpo/delta": 0.014067416079342365, + "fcm_dpo/margin": 115.79170227050781, + "fcm_dpo/q_t": 0.413519948720932, + "grad_norm": 27.220115661621094, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.28457504510879517, + "logits/rejected": -0.28229665756225586, + "logps/chosen": -219.76812744140625, + "logps/ref_chosen": -52.64057540893555, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -365.7442626953125, + "loss": 1.1331, + "margin_dpo/margin_mean": 115.79170227050781, + "margin_dpo/margin_std": 194.0624542236328, + "step": 223 + }, + { + "KL/chosen_KL_mean": -151.59030151367188, + "KL/mean": -218.800048828125, + "KL/rejected_KL_mean": -286.0097961425781, + "KL/std": 170.38592529296875, + "epoch": 0.328928046989721, + "fcm_dpo/beta": 0.003296963172033429, + "fcm_dpo/delta": -0.046691399067640305, + "fcm_dpo/margin": 134.4194793701172, + "fcm_dpo/q_t": 0.4030272364616394, + "grad_norm": 22.146337509155273, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.3159272074699402, + "logits/rejected": -0.29905927181243896, + "logps/chosen": -200.1857147216797, + "logps/ref_chosen": -48.59541320800781, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -363.12628173828125, + "loss": 1.0896, + "margin_dpo/margin_mean": 134.4194793701172, + "margin_dpo/margin_std": 198.03443908691406, + "step": 224 + }, + { + "KL/chosen_KL_mean": -178.26751708984375, + "KL/mean": -257.4982604980469, + "KL/rejected_KL_mean": -336.72900390625, + "KL/std": 167.69284057617188, + "epoch": 0.3303964757709251, + "fcm_dpo/beta": 0.00325207132846117, + "fcm_dpo/delta": -0.12169913947582245, + "fcm_dpo/margin": 158.46151733398438, + "fcm_dpo/q_t": 0.3860987424850464, + "grad_norm": 20.99376678466797, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.31401747465133667, + "logits/rejected": -0.3065524101257324, + "logps/chosen": -236.26797485351562, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90291595458984, + "logps/rejected": -436.6319274902344, + "loss": 1.0399, + "margin_dpo/margin_mean": 158.4615020751953, + "margin_dpo/margin_std": 211.03097534179688, + "step": 225 + }, + { + "KL/chosen_KL_mean": -154.25083923339844, + "KL/mean": -218.34536743164062, + "KL/rejected_KL_mean": -282.43994140625, + "KL/std": 166.46963500976562, + "epoch": 0.33186490455212925, + "fcm_dpo/beta": 0.003208290785551071, + "fcm_dpo/delta": -0.01261284202337265, + "fcm_dpo/margin": 128.18907165527344, + "fcm_dpo/q_t": 0.41000163555145264, + "grad_norm": 24.45271873474121, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.3766024708747864, + "logits/rejected": -0.3424544930458069, + "logps/chosen": -213.14964294433594, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -361.127685546875, + "loss": 1.1111, + "margin_dpo/margin_mean": 128.18907165527344, + "margin_dpo/margin_std": 198.9245147705078, + "step": 226 + }, + { + "KL/chosen_KL_mean": -181.13844299316406, + "KL/mean": -261.814453125, + "KL/rejected_KL_mean": -342.49041748046875, + "KL/std": 189.39837646484375, + "epoch": 0.3333333333333333, + "fcm_dpo/beta": 0.003157012164592743, + "fcm_dpo/delta": -0.11616270244121552, + "fcm_dpo/margin": 161.3520050048828, + "fcm_dpo/q_t": 0.38600099086761475, + "grad_norm": 20.904441833496094, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.34949034452438354, + "logits/rejected": -0.3386501669883728, + "logps/chosen": -240.2106170654297, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -441.9028015136719, + "loss": 1.0312, + "margin_dpo/margin_mean": 161.35202026367188, + "margin_dpo/margin_std": 202.92633056640625, + "step": 227 + }, + { + "KL/chosen_KL_mean": -182.08627319335938, + "KL/mean": -236.57989501953125, + "KL/rejected_KL_mean": -291.0735168457031, + "KL/std": 154.74215698242188, + "epoch": 0.33480176211453744, + "fcm_dpo/beta": 0.0031847129575908184, + "fcm_dpo/delta": 0.05459333956241608, + "fcm_dpo/margin": 108.98725128173828, + "fcm_dpo/q_t": 0.42142853140830994, + "grad_norm": 23.708755493164062, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.33513695001602173, + "logits/rejected": -0.31932687759399414, + "logps/chosen": -247.97756958007812, + "logps/ref_chosen": -65.89128875732422, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -382.12225341796875, + "loss": 1.1621, + "margin_dpo/margin_mean": 108.98725128173828, + "margin_dpo/margin_std": 198.34036254882812, + "step": 228 + }, + { + "KL/chosen_KL_mean": -189.10377502441406, + "KL/mean": -245.99990844726562, + "KL/rejected_KL_mean": -302.89605712890625, + "KL/std": 178.9686279296875, + "epoch": 0.33627019089574156, + "fcm_dpo/beta": 0.003205793909728527, + "fcm_dpo/delta": 0.0365116223692894, + "fcm_dpo/margin": 113.79228210449219, + "fcm_dpo/q_t": 0.41705501079559326, + "grad_norm": 32.29511642456055, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.4073641300201416, + "logits/rejected": -0.37577980756759644, + "logps/chosen": -259.8101501464844, + "logps/ref_chosen": -70.70637512207031, + "logps/ref_rejected": -84.52741241455078, + "logps/rejected": -387.4234619140625, + "loss": 1.1379, + "margin_dpo/margin_mean": 113.79228210449219, + "margin_dpo/margin_std": 187.65060424804688, + "step": 229 + }, + { + "KL/chosen_KL_mean": -134.7830810546875, + "KL/mean": -224.5379638671875, + "KL/rejected_KL_mean": -314.2928466796875, + "KL/std": 161.85427856445312, + "epoch": 0.3377386196769457, + "fcm_dpo/beta": 0.0031274245120584965, + "fcm_dpo/delta": -0.171125590801239, + "fcm_dpo/margin": 179.509765625, + "fcm_dpo/q_t": 0.37051764130592346, + "grad_norm": 29.884607315063477, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.33008766174316406, + "logits/rejected": -0.3357307016849518, + "logps/chosen": -174.06509399414062, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -399.9147644042969, + "loss": 0.9731, + "margin_dpo/margin_mean": 179.509765625, + "margin_dpo/margin_std": 179.34201049804688, + "step": 230 + }, + { + "KL/chosen_KL_mean": -152.9336395263672, + "KL/mean": -212.84384155273438, + "KL/rejected_KL_mean": -272.7540283203125, + "KL/std": 138.79013061523438, + "epoch": 0.3392070484581498, + "fcm_dpo/beta": 0.0031172512099146843, + "fcm_dpo/delta": 0.027481382712721825, + "fcm_dpo/margin": 119.82038116455078, + "fcm_dpo/q_t": 0.4157181978225708, + "grad_norm": 23.125385284423828, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.38110148906707764, + "logits/rejected": -0.35203638672828674, + "logps/chosen": -216.2100830078125, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -346.8779296875, + "loss": 1.1104, + "margin_dpo/margin_mean": 119.82037353515625, + "margin_dpo/margin_std": 167.3756866455078, + "step": 231 + }, + { + "KL/chosen_KL_mean": -184.20339965820312, + "KL/mean": -231.1331024169922, + "KL/rejected_KL_mean": -278.06280517578125, + "KL/std": 157.26010131835938, + "epoch": 0.3406754772393539, + "fcm_dpo/beta": 0.003163769142702222, + "fcm_dpo/delta": 0.10632483661174774, + "fcm_dpo/margin": 93.8593978881836, + "fcm_dpo/q_t": 0.4341059923171997, + "grad_norm": 21.933931350708008, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.397582471370697, + "logits/rejected": -0.37435561418533325, + "logps/chosen": -254.9521484375, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -362.03985595703125, + "loss": 1.1763, + "margin_dpo/margin_mean": 93.8593978881836, + "margin_dpo/margin_std": 168.48648071289062, + "step": 232 + }, + { + "KL/chosen_KL_mean": -160.23126220703125, + "KL/mean": -240.1412353515625, + "KL/rejected_KL_mean": -320.05120849609375, + "KL/std": 174.00173950195312, + "epoch": 0.342143906020558, + "fcm_dpo/beta": 0.0031418418511748314, + "fcm_dpo/delta": -0.1073966920375824, + "fcm_dpo/margin": 159.8199462890625, + "fcm_dpo/q_t": 0.39174312353134155, + "grad_norm": 22.029088973999023, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.3995114266872406, + "logits/rejected": -0.40224993228912354, + "logps/chosen": -215.11419677734375, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.4800796508789, + "logps/rejected": -427.5312805175781, + "loss": 1.0642, + "margin_dpo/margin_mean": 159.8199462890625, + "margin_dpo/margin_std": 231.9136199951172, + "step": 233 + }, + { + "KL/chosen_KL_mean": -149.30445861816406, + "KL/mean": -230.3643798828125, + "KL/rejected_KL_mean": -311.4242858886719, + "KL/std": 154.63232421875, + "epoch": 0.3436123348017621, + "fcm_dpo/beta": 0.0030462380964308977, + "fcm_dpo/delta": -0.10102301090955734, + "fcm_dpo/margin": 162.1198272705078, + "fcm_dpo/q_t": 0.38654690980911255, + "grad_norm": 25.708904266357422, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.3387085795402527, + "logits/rejected": -0.36640793085098267, + "logps/chosen": -193.39898681640625, + "logps/ref_chosen": -44.094520568847656, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -411.430908203125, + "loss": 1.0221, + "margin_dpo/margin_mean": 162.1198272705078, + "margin_dpo/margin_std": 174.42822265625, + "step": 234 + }, + { + "KL/chosen_KL_mean": -196.90953063964844, + "KL/mean": -248.29786682128906, + "KL/rejected_KL_mean": -299.68621826171875, + "KL/std": 150.12139892578125, + "epoch": 0.34508076358296624, + "fcm_dpo/beta": 0.0031026601791381836, + "fcm_dpo/delta": 0.08341732621192932, + "fcm_dpo/margin": 102.77667236328125, + "fcm_dpo/q_t": 0.42741718888282776, + "grad_norm": 29.176280975341797, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.4230844974517822, + "logits/rejected": -0.4051060080528259, + "logps/chosen": -259.1474609375, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39506530761719, + "logps/rejected": -390.0812683105469, + "loss": 1.1737, + "margin_dpo/margin_mean": 102.77667236328125, + "margin_dpo/margin_std": 188.78956604003906, + "step": 235 + }, + { + "KL/chosen_KL_mean": -145.384765625, + "KL/mean": -228.32327270507812, + "KL/rejected_KL_mean": -311.2618103027344, + "KL/std": 162.75277709960938, + "epoch": 0.3465491923641703, + "fcm_dpo/beta": 0.0030574114061892033, + "fcm_dpo/delta": -0.11282503604888916, + "fcm_dpo/margin": 165.8770294189453, + "fcm_dpo/q_t": 0.3818013072013855, + "grad_norm": 40.81807327270508, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.37076273560523987, + "logits/rejected": -0.37188154458999634, + "logps/chosen": -194.72613525390625, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -414.7734375, + "loss": 0.9972, + "margin_dpo/margin_mean": 165.87701416015625, + "margin_dpo/margin_std": 158.9945526123047, + "step": 236 + }, + { + "KL/chosen_KL_mean": -204.69064331054688, + "KL/mean": -265.85699462890625, + "KL/rejected_KL_mean": -327.0233459472656, + "KL/std": 163.26248168945312, + "epoch": 0.34801762114537443, + "fcm_dpo/beta": 0.0030537089332938194, + "fcm_dpo/delta": 0.02740669995546341, + "fcm_dpo/margin": 122.33269500732422, + "fcm_dpo/q_t": 0.41523268818855286, + "grad_norm": 31.793489456176758, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.3925231993198395, + "logits/rejected": -0.38862764835357666, + "logps/chosen": -258.8587646484375, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -421.8037109375, + "loss": 1.1268, + "margin_dpo/margin_mean": 122.33268737792969, + "margin_dpo/margin_std": 192.3961639404297, + "step": 237 + }, + { + "KL/chosen_KL_mean": -185.77113342285156, + "KL/mean": -250.8236083984375, + "KL/rejected_KL_mean": -315.8760986328125, + "KL/std": 168.3731689453125, + "epoch": 0.34948604992657856, + "fcm_dpo/beta": 0.0030407910235226154, + "fcm_dpo/delta": 0.004025213420391083, + "fcm_dpo/margin": 130.10498046875, + "fcm_dpo/q_t": 0.4116755723953247, + "grad_norm": 23.8784236907959, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.3695657551288605, + "logits/rejected": -0.3817945122718811, + "logps/chosen": -239.7442626953125, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -405.2940673828125, + "loss": 1.1026, + "margin_dpo/margin_mean": 130.10496520996094, + "margin_dpo/margin_std": 184.20098876953125, + "step": 238 + }, + { + "KL/chosen_KL_mean": -210.7151641845703, + "KL/mean": -281.1242980957031, + "KL/rejected_KL_mean": -351.5334167480469, + "KL/std": 166.0899200439453, + "epoch": 0.3509544787077827, + "fcm_dpo/beta": 0.003023794386535883, + "fcm_dpo/delta": -0.028100494295358658, + "fcm_dpo/margin": 140.81825256347656, + "fcm_dpo/q_t": 0.40190714597702026, + "grad_norm": 22.221601486206055, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.368292897939682, + "logits/rejected": -0.36200201511383057, + "logps/chosen": -268.81298828125, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -445.1263732910156, + "loss": 1.0808, + "margin_dpo/margin_mean": 140.81825256347656, + "margin_dpo/margin_std": 182.08518981933594, + "step": 239 + }, + { + "KL/chosen_KL_mean": -216.9973602294922, + "KL/mean": -274.34576416015625, + "KL/rejected_KL_mean": -331.69415283203125, + "KL/std": 175.84886169433594, + "epoch": 0.3524229074889868, + "fcm_dpo/beta": 0.0030523231253027916, + "fcm_dpo/delta": 0.05162970349192619, + "fcm_dpo/margin": 114.69682312011719, + "fcm_dpo/q_t": 0.42092815041542053, + "grad_norm": 35.80266571044922, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.38743269443511963, + "logits/rejected": -0.3659111261367798, + "logps/chosen": -277.6118469238281, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -405.81268310546875, + "loss": 1.1633, + "margin_dpo/margin_mean": 114.69682312011719, + "margin_dpo/margin_std": 208.11935424804688, + "step": 240 + }, + { + "KL/chosen_KL_mean": -181.38380432128906, + "KL/mean": -275.91607666015625, + "KL/rejected_KL_mean": -370.4483337402344, + "KL/std": 190.72769165039062, + "epoch": 0.35389133627019087, + "fcm_dpo/beta": 0.002998801413923502, + "fcm_dpo/delta": -0.17671522498130798, + "fcm_dpo/margin": 189.06454467773438, + "fcm_dpo/q_t": 0.37420031428337097, + "grad_norm": 22.811471939086914, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.37639686465263367, + "logits/rejected": -0.3528909683227539, + "logps/chosen": -247.47486877441406, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -458.50921630859375, + "loss": 0.9919, + "margin_dpo/margin_mean": 189.06454467773438, + "margin_dpo/margin_std": 214.1299591064453, + "step": 241 + }, + { + "KL/chosen_KL_mean": -193.56138610839844, + "KL/mean": -256.479736328125, + "KL/rejected_KL_mean": -319.3980407714844, + "KL/std": 160.45431518554688, + "epoch": 0.355359765051395, + "fcm_dpo/beta": 0.0029919487424194813, + "fcm_dpo/delta": 0.023674048483371735, + "fcm_dpo/margin": 125.83666229248047, + "fcm_dpo/q_t": 0.41582486033439636, + "grad_norm": 26.62445068359375, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.3719189763069153, + "logits/rejected": -0.35311925411224365, + "logps/chosen": -261.4253234863281, + "logps/ref_chosen": -67.86392974853516, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -402.75836181640625, + "loss": 1.1239, + "margin_dpo/margin_mean": 125.83665466308594, + "margin_dpo/margin_std": 194.09152221679688, + "step": 242 + }, + { + "KL/chosen_KL_mean": -182.10948181152344, + "KL/mean": -261.4007568359375, + "KL/rejected_KL_mean": -340.6920166015625, + "KL/std": 171.34756469726562, + "epoch": 0.3568281938325991, + "fcm_dpo/beta": 0.0029446138069033623, + "fcm_dpo/delta": -0.07035504281520844, + "fcm_dpo/margin": 158.58251953125, + "fcm_dpo/q_t": 0.39357781410217285, + "grad_norm": 31.950265884399414, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.374033123254776, + "logits/rejected": -0.34913793206214905, + "logps/chosen": -245.1937255859375, + "logps/ref_chosen": -63.0842399597168, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -417.02764892578125, + "loss": 1.0455, + "margin_dpo/margin_mean": 158.58251953125, + "margin_dpo/margin_std": 192.53515625, + "step": 243 + }, + { + "KL/chosen_KL_mean": -152.93548583984375, + "KL/mean": -233.27462768554688, + "KL/rejected_KL_mean": -313.61376953125, + "KL/std": 160.87197875976562, + "epoch": 0.35829662261380324, + "fcm_dpo/beta": 0.0029027406126260757, + "fcm_dpo/delta": -0.06980758905410767, + "fcm_dpo/margin": 160.67831420898438, + "fcm_dpo/q_t": 0.392170250415802, + "grad_norm": 34.01569747924805, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.4261099100112915, + "logits/rejected": -0.42256179451942444, + "logps/chosen": -214.076171875, + "logps/ref_chosen": -61.140689849853516, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -408.5057067871094, + "loss": 1.0296, + "margin_dpo/margin_mean": 160.67831420898438, + "margin_dpo/margin_std": 168.09088134765625, + "step": 244 + }, + { + "KL/chosen_KL_mean": -182.1680450439453, + "KL/mean": -244.11676025390625, + "KL/rejected_KL_mean": -306.0654602050781, + "KL/std": 164.1088104248047, + "epoch": 0.35976505139500736, + "fcm_dpo/beta": 0.0028605135157704353, + "fcm_dpo/delta": -0.06291086226701736, + "fcm_dpo/margin": 123.89741516113281, + "fcm_dpo/q_t": 0.4174633324146271, + "grad_norm": 24.750608444213867, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.4355185627937317, + "logits/rejected": -0.4130878448486328, + "logps/chosen": -249.43032836914062, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -393.70556640625, + "loss": 1.1342, + "margin_dpo/margin_mean": 123.89741516113281, + "margin_dpo/margin_std": 185.7630615234375, + "step": 245 + }, + { + "KL/chosen_KL_mean": -177.64511108398438, + "KL/mean": -240.8006134033203, + "KL/rejected_KL_mean": -303.9561462402344, + "KL/std": 180.6883544921875, + "epoch": 0.36123348017621143, + "fcm_dpo/beta": 0.0028710057958960533, + "fcm_dpo/delta": 0.03875650092959404, + "fcm_dpo/margin": 126.31101989746094, + "fcm_dpo/q_t": 0.4186369776725769, + "grad_norm": 21.476974487304688, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.45945310592651367, + "logits/rejected": -0.42501145601272583, + "logps/chosen": -244.34207153320312, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -388.302490234375, + "loss": 1.1283, + "margin_dpo/margin_mean": 126.31101989746094, + "margin_dpo/margin_std": 195.61752319335938, + "step": 246 + }, + { + "KL/chosen_KL_mean": -199.306884765625, + "KL/mean": -292.37335205078125, + "KL/rejected_KL_mean": -385.4398193359375, + "KL/std": 187.60211181640625, + "epoch": 0.36270190895741555, + "fcm_dpo/beta": 0.002821533940732479, + "fcm_dpo/delta": -0.13242369890213013, + "fcm_dpo/margin": 186.1329345703125, + "fcm_dpo/q_t": 0.3798220753669739, + "grad_norm": 30.762731552124023, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.3767867386341095, + "logits/rejected": -0.37550073862075806, + "logps/chosen": -255.9122314453125, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29326629638672, + "logps/rejected": -491.73309326171875, + "loss": 1.002, + "margin_dpo/margin_mean": 186.1329345703125, + "margin_dpo/margin_std": 199.94444274902344, + "step": 247 + }, + { + "KL/chosen_KL_mean": -168.62863159179688, + "KL/mean": -249.12957763671875, + "KL/rejected_KL_mean": -329.6305236816406, + "KL/std": 152.61151123046875, + "epoch": 0.3641703377386197, + "fcm_dpo/beta": 0.002776243956759572, + "fcm_dpo/delta": -0.04965835064649582, + "fcm_dpo/margin": 161.00189208984375, + "fcm_dpo/q_t": 0.3941725492477417, + "grad_norm": 29.54060935974121, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.39201515913009644, + "logits/rejected": -0.411907821893692, + "logps/chosen": -212.67184448242188, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -421.4873962402344, + "loss": 1.0304, + "margin_dpo/margin_mean": 161.00189208984375, + "margin_dpo/margin_std": 149.979248046875, + "step": 248 + }, + { + "KL/chosen_KL_mean": -237.52285766601562, + "KL/mean": -280.86602783203125, + "KL/rejected_KL_mean": -324.20916748046875, + "KL/std": 169.23114013671875, + "epoch": 0.3656387665198238, + "fcm_dpo/beta": 0.002819925779476762, + "fcm_dpo/delta": 0.07187280803918839, + "fcm_dpo/margin": 86.68631744384766, + "fcm_dpo/q_t": 0.44473278522491455, + "grad_norm": 31.686059951782227, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.4031098484992981, + "logits/rejected": -0.3796127140522003, + "logps/chosen": -299.9652099609375, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -404.6772155761719, + "loss": 1.2357, + "margin_dpo/margin_mean": 86.68630981445312, + "margin_dpo/margin_std": 204.52139282226562, + "step": 249 + }, + { + "KL/chosen_KL_mean": -216.50027465820312, + "KL/mean": -296.10235595703125, + "KL/rejected_KL_mean": -375.7043762207031, + "KL/std": 172.25009155273438, + "epoch": 0.3671071953010279, + "fcm_dpo/beta": 0.002813429571688175, + "fcm_dpo/delta": -0.050442732870578766, + "fcm_dpo/margin": 159.2041015625, + "fcm_dpo/q_t": 0.396010160446167, + "grad_norm": 39.18155288696289, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.4070359468460083, + "logits/rejected": -0.35379326343536377, + "logps/chosen": -282.136962890625, + "logps/ref_chosen": -65.63668823242188, + "logps/ref_rejected": -73.87184143066406, + "logps/rejected": -449.57623291015625, + "loss": 1.048, + "margin_dpo/margin_mean": 159.2041015625, + "margin_dpo/margin_std": 176.7471923828125, + "step": 250 + }, + { + "KL/chosen_KL_mean": -232.60723876953125, + "KL/mean": -285.5682678222656, + "KL/rejected_KL_mean": -338.529296875, + "KL/std": 180.64505004882812, + "epoch": 0.368575624082232, + "fcm_dpo/beta": 0.002828112803399563, + "fcm_dpo/delta": 0.10367438197135925, + "fcm_dpo/margin": 105.92207336425781, + "fcm_dpo/q_t": 0.4322122633457184, + "grad_norm": 36.757362365722656, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.4116126298904419, + "logits/rejected": -0.37989452481269836, + "logps/chosen": -289.7899475097656, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -416.1927490234375, + "loss": 1.1861, + "margin_dpo/margin_mean": 105.92207336425781, + "margin_dpo/margin_std": 196.50746154785156, + "step": 251 + }, + { + "KL/chosen_KL_mean": -235.21749877929688, + "KL/mean": -305.7660217285156, + "KL/rejected_KL_mean": -376.3145446777344, + "KL/std": 156.46197509765625, + "epoch": 0.3700440528634361, + "fcm_dpo/beta": 0.0028432621620595455, + "fcm_dpo/delta": -0.0013277605175971985, + "fcm_dpo/margin": 141.09707641601562, + "fcm_dpo/q_t": 0.4044671356678009, + "grad_norm": 32.75727081298828, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.4887322187423706, + "logits/rejected": -0.4495335817337036, + "logps/chosen": -306.90313720703125, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75799560546875, + "logps/rejected": -461.0725402832031, + "loss": 1.0723, + "margin_dpo/margin_mean": 141.0970916748047, + "margin_dpo/margin_std": 154.25804138183594, + "step": 252 + }, + { + "KL/chosen_KL_mean": -212.88052368164062, + "KL/mean": -288.3155517578125, + "KL/rejected_KL_mean": -363.75054931640625, + "KL/std": 172.37203979492188, + "epoch": 0.37151248164464024, + "fcm_dpo/beta": 0.002854670397937298, + "fcm_dpo/delta": -0.03276565670967102, + "fcm_dpo/margin": 150.87002563476562, + "fcm_dpo/q_t": 0.40049469470977783, + "grad_norm": 30.010822296142578, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.4635479152202606, + "logits/rejected": -0.4555034637451172, + "logps/chosen": -282.01446533203125, + "logps/ref_chosen": -69.1339340209961, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -462.4530944824219, + "loss": 1.0725, + "margin_dpo/margin_mean": 150.87002563476562, + "margin_dpo/margin_std": 187.1278533935547, + "step": 253 + }, + { + "KL/chosen_KL_mean": -218.92982482910156, + "KL/mean": -283.40130615234375, + "KL/rejected_KL_mean": -347.8727111816406, + "KL/std": 189.0523223876953, + "epoch": 0.37298091042584436, + "fcm_dpo/beta": 0.0028438782319426537, + "fcm_dpo/delta": 0.03458229452371597, + "fcm_dpo/margin": 128.94290161132812, + "fcm_dpo/q_t": 0.42061156034469604, + "grad_norm": 27.448017120361328, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.42898088693618774, + "logits/rejected": -0.412333220243454, + "logps/chosen": -273.0848388671875, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -438.18035888671875, + "loss": 1.1518, + "margin_dpo/margin_mean": 128.94290161132812, + "margin_dpo/margin_std": 234.24417114257812, + "step": 254 + }, + { + "KL/chosen_KL_mean": -221.4287109375, + "KL/mean": -289.2665710449219, + "KL/rejected_KL_mean": -357.1044006347656, + "KL/std": 166.446533203125, + "epoch": 0.3744493392070485, + "fcm_dpo/beta": 0.0028422600589692593, + "fcm_dpo/delta": 0.014506950974464417, + "fcm_dpo/margin": 135.67572021484375, + "fcm_dpo/q_t": 0.4121626913547516, + "grad_norm": 25.36671257019043, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.39531874656677246, + "logits/rejected": -0.3849487900733948, + "logps/chosen": -278.57037353515625, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -447.31298828125, + "loss": 1.1249, + "margin_dpo/margin_mean": 135.67572021484375, + "margin_dpo/margin_std": 212.42404174804688, + "step": 255 + }, + { + "KL/chosen_KL_mean": -182.2279052734375, + "KL/mean": -258.11773681640625, + "KL/rejected_KL_mean": -334.0075988769531, + "KL/std": 175.14785766601562, + "epoch": 0.37591776798825255, + "fcm_dpo/beta": 0.0028450002428144217, + "fcm_dpo/delta": -0.03324428200721741, + "fcm_dpo/margin": 151.77967834472656, + "fcm_dpo/q_t": 0.40111541748046875, + "grad_norm": 22.90846061706543, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.37317514419555664, + "logits/rejected": -0.38296449184417725, + "logps/chosen": -237.39138793945312, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -426.57049560546875, + "loss": 1.0753, + "margin_dpo/margin_mean": 151.77967834472656, + "margin_dpo/margin_std": 200.09576416015625, + "step": 256 + }, + { + "KL/chosen_KL_mean": -171.60227966308594, + "KL/mean": -247.82455444335938, + "KL/rejected_KL_mean": -324.04681396484375, + "KL/std": 171.75497436523438, + "epoch": 0.37738619676945667, + "fcm_dpo/beta": 0.0028111585415899754, + "fcm_dpo/delta": -0.03062574565410614, + "fcm_dpo/margin": 152.44454956054688, + "fcm_dpo/q_t": 0.40255630016326904, + "grad_norm": 23.42417335510254, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.37235432863235474, + "logits/rejected": -0.3574731945991516, + "logps/chosen": -221.02597045898438, + "logps/ref_chosen": -49.42369842529297, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -403.5847473144531, + "loss": 1.079, + "margin_dpo/margin_mean": 152.4445343017578, + "margin_dpo/margin_std": 199.14427185058594, + "step": 257 + }, + { + "KL/chosen_KL_mean": -208.58660888671875, + "KL/mean": -283.39227294921875, + "KL/rejected_KL_mean": -358.19793701171875, + "KL/std": 177.68453979492188, + "epoch": 0.3788546255506608, + "fcm_dpo/beta": 0.0028177620843052864, + "fcm_dpo/delta": -0.022539909929037094, + "fcm_dpo/margin": 149.61135864257812, + "fcm_dpo/q_t": 0.40266942977905273, + "grad_norm": 21.441373825073242, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.38592028617858887, + "logits/rejected": -0.38714897632598877, + "logps/chosen": -267.9707336425781, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.99010467529297, + "logps/rejected": -454.18804931640625, + "loss": 1.0923, + "margin_dpo/margin_mean": 149.61135864257812, + "margin_dpo/margin_std": 213.09808349609375, + "step": 258 + }, + { + "KL/chosen_KL_mean": -190.80014038085938, + "KL/mean": -251.88589477539062, + "KL/rejected_KL_mean": -312.9716491699219, + "KL/std": 164.8191375732422, + "epoch": 0.3803230543318649, + "fcm_dpo/beta": 0.0028243116103112698, + "fcm_dpo/delta": 0.056878622621297836, + "fcm_dpo/margin": 122.1714859008789, + "fcm_dpo/q_t": 0.4213777184486389, + "grad_norm": 20.104839324951172, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.43218350410461426, + "logits/rejected": -0.43397071957588196, + "logps/chosen": -243.62847900390625, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.191650390625, + "logps/rejected": -402.1632995605469, + "loss": 1.1409, + "margin_dpo/margin_mean": 122.17149353027344, + "margin_dpo/margin_std": 192.09332275390625, + "step": 259 + }, + { + "KL/chosen_KL_mean": -179.60235595703125, + "KL/mean": -260.7071838378906, + "KL/rejected_KL_mean": -341.81207275390625, + "KL/std": 161.13522338867188, + "epoch": 0.38179148311306904, + "fcm_dpo/beta": 0.0028252771589905024, + "fcm_dpo/delta": -0.06110315024852753, + "fcm_dpo/margin": 162.20968627929688, + "fcm_dpo/q_t": 0.39523473381996155, + "grad_norm": 28.360965728759766, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.3964114189147949, + "logits/rejected": -0.41389453411102295, + "logps/chosen": -227.02001953125, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08978271484375, + "logps/rejected": -436.90185546875, + "loss": 1.0368, + "margin_dpo/margin_mean": 162.20970153808594, + "margin_dpo/margin_std": 178.4059295654297, + "step": 260 + }, + { + "KL/chosen_KL_mean": -177.91055297851562, + "KL/mean": -253.4554443359375, + "KL/rejected_KL_mean": -329.0003356933594, + "KL/std": 176.36572265625, + "epoch": 0.3832599118942731, + "fcm_dpo/beta": 0.0027923104353249073, + "fcm_dpo/delta": -0.022975105792284012, + "fcm_dpo/margin": 151.08978271484375, + "fcm_dpo/q_t": 0.404338002204895, + "grad_norm": 19.33860969543457, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.344787061214447, + "logits/rejected": -0.3448353409767151, + "logps/chosen": -230.94192504882812, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -417.5152893066406, + "loss": 1.0807, + "margin_dpo/margin_mean": 151.08978271484375, + "margin_dpo/margin_std": 202.36895751953125, + "step": 261 + }, + { + "KL/chosen_KL_mean": -209.18212890625, + "KL/mean": -271.7855224609375, + "KL/rejected_KL_mean": -334.3888854980469, + "KL/std": 161.00527954101562, + "epoch": 0.38472834067547723, + "fcm_dpo/beta": 0.002820716006681323, + "fcm_dpo/delta": 0.04834378883242607, + "fcm_dpo/margin": 125.2067642211914, + "fcm_dpo/q_t": 0.41728508472442627, + "grad_norm": 22.37173843383789, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.37253305315971375, + "logits/rejected": -0.349983811378479, + "logps/chosen": -268.8022766113281, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -420.80743408203125, + "loss": 1.1113, + "margin_dpo/margin_mean": 125.2067642211914, + "margin_dpo/margin_std": 159.09869384765625, + "step": 262 + }, + { + "KL/chosen_KL_mean": -196.1129150390625, + "KL/mean": -283.8154296875, + "KL/rejected_KL_mean": -371.51788330078125, + "KL/std": 189.73565673828125, + "epoch": 0.38619676945668135, + "fcm_dpo/beta": 0.0027740350924432278, + "fcm_dpo/delta": -0.09155195951461792, + "fcm_dpo/margin": 175.40493774414062, + "fcm_dpo/q_t": 0.3883536458015442, + "grad_norm": 20.149484634399414, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.38992154598236084, + "logits/rejected": -0.36442500352859497, + "logps/chosen": -255.53387451171875, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -468.3750915527344, + "loss": 1.0312, + "margin_dpo/margin_mean": 175.40493774414062, + "margin_dpo/margin_std": 203.16934204101562, + "step": 263 + }, + { + "KL/chosen_KL_mean": -215.1387939453125, + "KL/mean": -291.82952880859375, + "KL/rejected_KL_mean": -368.520263671875, + "KL/std": 174.2871856689453, + "epoch": 0.3876651982378855, + "fcm_dpo/beta": 0.0027424870058894157, + "fcm_dpo/delta": -0.022643636912107468, + "fcm_dpo/margin": 153.38145446777344, + "fcm_dpo/q_t": 0.4036809206008911, + "grad_norm": 21.269851684570312, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.4510270953178406, + "logits/rejected": -0.44331079721450806, + "logps/chosen": -277.86090087890625, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85620880126953, + "logps/rejected": -462.37646484375, + "loss": 1.0828, + "margin_dpo/margin_mean": 153.38145446777344, + "margin_dpo/margin_std": 199.32289123535156, + "step": 264 + }, + { + "KL/chosen_KL_mean": -248.67318725585938, + "KL/mean": -317.910888671875, + "KL/rejected_KL_mean": -387.1485900878906, + "KL/std": 200.890380859375, + "epoch": 0.3891336270190896, + "fcm_dpo/beta": 0.0027675144374370575, + "fcm_dpo/delta": 0.01740371063351631, + "fcm_dpo/margin": 138.47540283203125, + "fcm_dpo/q_t": 0.4150318503379822, + "grad_norm": 26.573469161987305, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.3928653597831726, + "logits/rejected": -0.37691766023635864, + "logps/chosen": -310.6446533203125, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -475.169189453125, + "loss": 1.1456, + "margin_dpo/margin_mean": 138.47540283203125, + "margin_dpo/margin_std": 247.60830688476562, + "step": 265 + }, + { + "KL/chosen_KL_mean": -252.51040649414062, + "KL/mean": -309.49322509765625, + "KL/rejected_KL_mean": -366.4759826660156, + "KL/std": 169.7954864501953, + "epoch": 0.39060205580029367, + "fcm_dpo/beta": 0.0028024273924529552, + "fcm_dpo/delta": 0.08327002823352814, + "fcm_dpo/margin": 113.96560668945312, + "fcm_dpo/q_t": 0.4282206892967224, + "grad_norm": 47.3087272644043, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.4444202184677124, + "logits/rejected": -0.40275368094444275, + "logps/chosen": -319.6100769042969, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -434.44720458984375, + "loss": 1.1671, + "margin_dpo/margin_mean": 113.96561431884766, + "margin_dpo/margin_std": 202.73809814453125, + "step": 266 + }, + { + "KL/chosen_KL_mean": -230.05838012695312, + "KL/mean": -302.7050476074219, + "KL/rejected_KL_mean": -375.3517150878906, + "KL/std": 186.0106964111328, + "epoch": 0.3920704845814978, + "fcm_dpo/beta": 0.002801567316055298, + "fcm_dpo/delta": -0.007594583556056023, + "fcm_dpo/margin": 145.2933349609375, + "fcm_dpo/q_t": 0.4081183075904846, + "grad_norm": 42.9015007019043, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.45946192741394043, + "logits/rejected": -0.4249149560928345, + "logps/chosen": -299.02911376953125, + "logps/ref_chosen": -68.97075653076172, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -465.5201416015625, + "loss": 1.1036, + "margin_dpo/margin_mean": 145.2933349609375, + "margin_dpo/margin_std": 216.03964233398438, + "step": 267 + }, + { + "KL/chosen_KL_mean": -240.4680938720703, + "KL/mean": -314.457763671875, + "KL/rejected_KL_mean": -388.44744873046875, + "KL/std": 178.60980224609375, + "epoch": 0.3935389133627019, + "fcm_dpo/beta": 0.002796788001433015, + "fcm_dpo/delta": -0.014621859416365623, + "fcm_dpo/margin": 147.9793243408203, + "fcm_dpo/q_t": 0.40683937072753906, + "grad_norm": 40.570281982421875, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.4454725980758667, + "logits/rejected": -0.4506447911262512, + "logps/chosen": -296.368408203125, + "logps/ref_chosen": -55.90031051635742, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -490.0950927734375, + "loss": 1.0956, + "margin_dpo/margin_mean": 147.9793243408203, + "margin_dpo/margin_std": 214.23509216308594, + "step": 268 + }, + { + "KL/chosen_KL_mean": -267.2579345703125, + "KL/mean": -353.1320495605469, + "KL/rejected_KL_mean": -439.00616455078125, + "KL/std": 187.59320068359375, + "epoch": 0.39500734214390604, + "fcm_dpo/beta": 0.0027706455439329147, + "fcm_dpo/delta": -0.07959494739770889, + "fcm_dpo/margin": 171.74826049804688, + "fcm_dpo/q_t": 0.39228904247283936, + "grad_norm": 46.9615364074707, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.4752381443977356, + "logits/rejected": -0.4565969407558441, + "logps/chosen": -337.2974853515625, + "logps/ref_chosen": -70.03955841064453, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -546.3555297851562, + "loss": 1.0666, + "margin_dpo/margin_mean": 171.74826049804688, + "margin_dpo/margin_std": 238.62860107421875, + "step": 269 + }, + { + "KL/chosen_KL_mean": -247.1910400390625, + "KL/mean": -316.6631774902344, + "KL/rejected_KL_mean": -386.13531494140625, + "KL/std": 175.74114990234375, + "epoch": 0.3964757709251101, + "fcm_dpo/beta": 0.0027725521940737963, + "fcm_dpo/delta": 0.015092555433511734, + "fcm_dpo/margin": 138.9442901611328, + "fcm_dpo/q_t": 0.41288816928863525, + "grad_norm": 37.03840255737305, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.49404847621917725, + "logits/rejected": -0.48589587211608887, + "logps/chosen": -316.7245178222656, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -496.06396484375, + "loss": 1.1284, + "margin_dpo/margin_mean": 138.94430541992188, + "margin_dpo/margin_std": 224.98004150390625, + "step": 270 + }, + { + "KL/chosen_KL_mean": -232.8036346435547, + "KL/mean": -320.9297790527344, + "KL/rejected_KL_mean": -409.055908203125, + "KL/std": 184.5279541015625, + "epoch": 0.39794419970631423, + "fcm_dpo/beta": 0.002722542965784669, + "fcm_dpo/delta": -0.08434660732746124, + "fcm_dpo/margin": 176.25228881835938, + "fcm_dpo/q_t": 0.3910220265388489, + "grad_norm": 28.625146865844727, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.412489652633667, + "logits/rejected": -0.39746084809303284, + "logps/chosen": -289.5682067871094, + "logps/ref_chosen": -56.76456832885742, + "logps/ref_rejected": -92.51383972167969, + "logps/rejected": -501.56976318359375, + "loss": 1.036, + "margin_dpo/margin_mean": 176.25230407714844, + "margin_dpo/margin_std": 207.7333526611328, + "step": 271 + }, + { + "KL/chosen_KL_mean": -228.11370849609375, + "KL/mean": -338.98406982421875, + "KL/rejected_KL_mean": -449.85443115234375, + "KL/std": 208.9947052001953, + "epoch": 0.39941262848751835, + "fcm_dpo/beta": 0.002650283742696047, + "fcm_dpo/delta": -0.1989383101463318, + "fcm_dpo/margin": 221.74072265625, + "fcm_dpo/q_t": 0.3656819462776184, + "grad_norm": 28.886754989624023, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.3776736557483673, + "logits/rejected": -0.39134037494659424, + "logps/chosen": -277.6108703613281, + "logps/ref_chosen": -49.497154235839844, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -555.397216796875, + "loss": 0.9588, + "margin_dpo/margin_mean": 221.74072265625, + "margin_dpo/margin_std": 217.42919921875, + "step": 272 + }, + { + "KL/chosen_KL_mean": -259.71624755859375, + "KL/mean": -362.0477294921875, + "KL/rejected_KL_mean": -464.37921142578125, + "KL/std": 212.40911865234375, + "epoch": 0.4008810572687225, + "fcm_dpo/beta": 0.0025578399654477835, + "fcm_dpo/delta": -0.130665123462677, + "fcm_dpo/margin": 204.66293334960938, + "fcm_dpo/q_t": 0.38167691230773926, + "grad_norm": 22.654264450073242, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.3508095145225525, + "logits/rejected": -0.33492064476013184, + "logps/chosen": -322.691650390625, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -556.8778076171875, + "loss": 1.0197, + "margin_dpo/margin_mean": 204.6629638671875, + "margin_dpo/margin_std": 246.62765502929688, + "step": 273 + }, + { + "KL/chosen_KL_mean": -288.61114501953125, + "KL/mean": -363.21929931640625, + "KL/rejected_KL_mean": -437.827392578125, + "KL/std": 188.80880737304688, + "epoch": 0.4023494860499266, + "fcm_dpo/beta": 0.002562238136306405, + "fcm_dpo/delta": 0.018024658784270287, + "fcm_dpo/margin": 149.21621704101562, + "fcm_dpo/q_t": 0.4136476218700409, + "grad_norm": 26.790353775024414, + "learning_rate": 3.75e-07, + "logits/chosen": -0.31436455249786377, + "logits/rejected": -0.299676775932312, + "logps/chosen": -344.27886962890625, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -515.1604614257812, + "loss": 1.1192, + "margin_dpo/margin_mean": 149.21621704101562, + "margin_dpo/margin_std": 225.90634155273438, + "step": 274 + }, + { + "KL/chosen_KL_mean": -218.06088256835938, + "KL/mean": -301.337890625, + "KL/rejected_KL_mean": -384.61492919921875, + "KL/std": 183.7950439453125, + "epoch": 0.40381791483113066, + "fcm_dpo/beta": 0.0025526927784085274, + "fcm_dpo/delta": -0.026383230462670326, + "fcm_dpo/margin": 166.55404663085938, + "fcm_dpo/q_t": 0.40220552682876587, + "grad_norm": 23.10381507873535, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.3793821334838867, + "logits/rejected": -0.38762110471725464, + "logps/chosen": -266.65557861328125, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -477.91864013671875, + "loss": 1.0756, + "margin_dpo/margin_mean": 166.5540313720703, + "margin_dpo/margin_std": 212.90811157226562, + "step": 275 + }, + { + "KL/chosen_KL_mean": -227.9523468017578, + "KL/mean": -303.5640869140625, + "KL/rejected_KL_mean": -379.1758117675781, + "KL/std": 192.03842163085938, + "epoch": 0.4052863436123348, + "fcm_dpo/beta": 0.00254382798448205, + "fcm_dpo/delta": 0.015904389321804047, + "fcm_dpo/margin": 151.22344970703125, + "fcm_dpo/q_t": 0.4147305190563202, + "grad_norm": 25.779754638671875, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.369443416595459, + "logits/rejected": -0.34958142042160034, + "logps/chosen": -284.5297546386719, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -449.5414733886719, + "loss": 1.1294, + "margin_dpo/margin_mean": 151.22344970703125, + "margin_dpo/margin_std": 247.93980407714844, + "step": 276 + }, + { + "KL/chosen_KL_mean": -234.90208435058594, + "KL/mean": -317.98382568359375, + "KL/rejected_KL_mean": -401.0655517578125, + "KL/std": 185.35687255859375, + "epoch": 0.4067547723935389, + "fcm_dpo/beta": 0.002545831026509404, + "fcm_dpo/delta": -0.024078505113720894, + "fcm_dpo/margin": 166.16348266601562, + "fcm_dpo/q_t": 0.4027344584465027, + "grad_norm": 28.24149513244629, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.3797181248664856, + "logits/rejected": -0.36499595642089844, + "logps/chosen": -291.17364501953125, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -493.94683837890625, + "loss": 1.0789, + "margin_dpo/margin_mean": 166.16348266601562, + "margin_dpo/margin_std": 216.53277587890625, + "step": 277 + }, + { + "KL/chosen_KL_mean": -200.89450073242188, + "KL/mean": -295.7635803222656, + "KL/rejected_KL_mean": -390.6326904296875, + "KL/std": 192.54470825195312, + "epoch": 0.40822320117474303, + "fcm_dpo/beta": 0.0024995177518576384, + "fcm_dpo/delta": -0.07845177501440048, + "fcm_dpo/margin": 189.73814392089844, + "fcm_dpo/q_t": 0.3918403387069702, + "grad_norm": 28.204530715942383, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.3621584475040436, + "logits/rejected": -0.36707815527915955, + "logps/chosen": -253.83644104003906, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -481.8862609863281, + "loss": 1.0374, + "margin_dpo/margin_mean": 189.73814392089844, + "margin_dpo/margin_std": 220.7503662109375, + "step": 278 + }, + { + "KL/chosen_KL_mean": -234.5529022216797, + "KL/mean": -331.0247802734375, + "KL/rejected_KL_mean": -427.4967041015625, + "KL/std": 201.0699462890625, + "epoch": 0.40969162995594716, + "fcm_dpo/beta": 0.002455736044794321, + "fcm_dpo/delta": -0.07850091904401779, + "fcm_dpo/margin": 192.94378662109375, + "fcm_dpo/q_t": 0.3932103216648102, + "grad_norm": 28.575786590576172, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.30353468656539917, + "logits/rejected": -0.3015468418598175, + "logps/chosen": -283.1942138671875, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -515.34814453125, + "loss": 1.0546, + "margin_dpo/margin_mean": 192.94378662109375, + "margin_dpo/margin_std": 249.06761169433594, + "step": 279 + }, + { + "KL/chosen_KL_mean": -236.78497314453125, + "KL/mean": -333.14849853515625, + "KL/rejected_KL_mean": -429.51202392578125, + "KL/std": 184.6090087890625, + "epoch": 0.4111600587371512, + "fcm_dpo/beta": 0.002430729568004608, + "fcm_dpo/delta": -0.07191157341003418, + "fcm_dpo/margin": 192.72705078125, + "fcm_dpo/q_t": 0.39016252756118774, + "grad_norm": 25.460493087768555, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.33522510528564453, + "logits/rejected": -0.34023696184158325, + "logps/chosen": -295.58209228515625, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -528.130859375, + "loss": 1.0293, + "margin_dpo/margin_mean": 192.72706604003906, + "margin_dpo/margin_std": 203.84701538085938, + "step": 280 + }, + { + "KL/chosen_KL_mean": -226.8874969482422, + "KL/mean": -314.1214599609375, + "KL/rejected_KL_mean": -401.35540771484375, + "KL/std": 185.2099151611328, + "epoch": 0.41262848751835535, + "fcm_dpo/beta": 0.002424264792352915, + "fcm_dpo/delta": -0.024164361879229546, + "fcm_dpo/margin": 174.46792602539062, + "fcm_dpo/q_t": 0.3996432423591614, + "grad_norm": 25.438335418701172, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.3693084716796875, + "logits/rejected": -0.34565502405166626, + "logps/chosen": -282.3760070800781, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -482.23797607421875, + "loss": 1.0649, + "margin_dpo/margin_mean": 174.46792602539062, + "margin_dpo/margin_std": 200.58334350585938, + "step": 281 + }, + { + "KL/chosen_KL_mean": -254.75025939941406, + "KL/mean": -321.8013610839844, + "KL/rejected_KL_mean": -388.85247802734375, + "KL/std": 179.27874755859375, + "epoch": 0.41409691629955947, + "fcm_dpo/beta": 0.0024102902971208096, + "fcm_dpo/delta": -0.00805886834859848, + "fcm_dpo/margin": 134.1021728515625, + "fcm_dpo/q_t": 0.42627450823783875, + "grad_norm": 21.817731857299805, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.4306085705757141, + "logits/rejected": -0.4117346405982971, + "logps/chosen": -327.8204040527344, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -484.20343017578125, + "loss": 1.1493, + "margin_dpo/margin_mean": 134.1021728515625, + "margin_dpo/margin_std": 210.12014770507812, + "step": 282 + }, + { + "KL/chosen_KL_mean": -255.66551208496094, + "KL/mean": -355.66534423828125, + "KL/rejected_KL_mean": -455.6651611328125, + "KL/std": 223.33087158203125, + "epoch": 0.4155653450807636, + "fcm_dpo/beta": 0.002389241009950638, + "fcm_dpo/delta": -0.08176899701356888, + "fcm_dpo/margin": 199.99966430664062, + "fcm_dpo/q_t": 0.3880097270011902, + "grad_norm": 38.63431167602539, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.38895383477211, + "logits/rejected": -0.3880043029785156, + "logps/chosen": -317.56396484375, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -552.6517333984375, + "loss": 1.025, + "margin_dpo/margin_mean": 199.99964904785156, + "margin_dpo/margin_std": 207.48794555664062, + "step": 283 + }, + { + "KL/chosen_KL_mean": -252.18701171875, + "KL/mean": -347.4981689453125, + "KL/rejected_KL_mean": -442.809326171875, + "KL/std": 203.94338989257812, + "epoch": 0.4170337738619677, + "fcm_dpo/beta": 0.00234918761998415, + "fcm_dpo/delta": -0.050124749541282654, + "fcm_dpo/margin": 190.62234497070312, + "fcm_dpo/q_t": 0.3954547643661499, + "grad_norm": 31.127235412597656, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.4325849413871765, + "logits/rejected": -0.4140619933605194, + "logps/chosen": -310.62255859375, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -536.278564453125, + "loss": 1.0387, + "margin_dpo/margin_mean": 190.62232971191406, + "margin_dpo/margin_std": 199.58392333984375, + "step": 284 + }, + { + "KL/chosen_KL_mean": -275.643310546875, + "KL/mean": -361.4195556640625, + "KL/rejected_KL_mean": -447.19573974609375, + "KL/std": 183.0438232421875, + "epoch": 0.4185022026431718, + "fcm_dpo/beta": 0.0023565019946545362, + "fcm_dpo/delta": -0.005000069737434387, + "fcm_dpo/margin": 171.5524444580078, + "fcm_dpo/q_t": 0.40418434143066406, + "grad_norm": 24.421079635620117, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.40086328983306885, + "logits/rejected": -0.38890522718429565, + "logps/chosen": -341.8755187988281, + "logps/ref_chosen": -66.23219299316406, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -546.3225708007812, + "loss": 1.0734, + "margin_dpo/margin_mean": 171.5524444580078, + "margin_dpo/margin_std": 191.03448486328125, + "step": 285 + }, + { + "KL/chosen_KL_mean": -279.2388000488281, + "KL/mean": -373.7999572753906, + "KL/rejected_KL_mean": -468.3611145019531, + "KL/std": 205.68328857421875, + "epoch": 0.4199706314243759, + "fcm_dpo/beta": 0.002331117633730173, + "fcm_dpo/delta": -0.042728979140520096, + "fcm_dpo/margin": 189.12229919433594, + "fcm_dpo/q_t": 0.397432416677475, + "grad_norm": 21.488588333129883, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.39442330598831177, + "logits/rejected": -0.37771064043045044, + "logps/chosen": -352.1898193359375, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -556.9495849609375, + "loss": 1.0568, + "margin_dpo/margin_mean": 189.12228393554688, + "margin_dpo/margin_std": 223.15341186523438, + "step": 286 + }, + { + "KL/chosen_KL_mean": -267.8426513671875, + "KL/mean": -340.4630126953125, + "KL/rejected_KL_mean": -413.0833740234375, + "KL/std": 175.79714965820312, + "epoch": 0.42143906020558003, + "fcm_dpo/beta": 0.0023477966897189617, + "fcm_dpo/delta": 0.060965895652770996, + "fcm_dpo/margin": 145.24075317382812, + "fcm_dpo/q_t": 0.42086952924728394, + "grad_norm": 31.77181625366211, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.3862367272377014, + "logits/rejected": -0.3612860441207886, + "logps/chosen": -329.3837890625, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.69607543945312, + "logps/rejected": -490.7794494628906, + "loss": 1.1282, + "margin_dpo/margin_mean": 145.24075317382812, + "margin_dpo/margin_std": 204.16952514648438, + "step": 287 + }, + { + "KL/chosen_KL_mean": -258.47845458984375, + "KL/mean": -353.82647705078125, + "KL/rejected_KL_mean": -449.17449951171875, + "KL/std": 181.2964630126953, + "epoch": 0.42290748898678415, + "fcm_dpo/beta": 0.0023200467694550753, + "fcm_dpo/delta": -0.04565563425421715, + "fcm_dpo/margin": 190.696044921875, + "fcm_dpo/q_t": 0.39606454968452454, + "grad_norm": 21.244749069213867, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.4210980534553528, + "logits/rejected": -0.4073001742362976, + "logps/chosen": -315.1396484375, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.33570098876953, + "logps/rejected": -536.5101928710938, + "loss": 1.0438, + "margin_dpo/margin_mean": 190.696044921875, + "margin_dpo/margin_std": 194.03762817382812, + "step": 288 + }, + { + "KL/chosen_KL_mean": -238.14132690429688, + "KL/mean": -345.27081298828125, + "KL/rejected_KL_mean": -452.4002380371094, + "KL/std": 204.56678771972656, + "epoch": 0.4243759177679883, + "fcm_dpo/beta": 0.0023045637644827366, + "fcm_dpo/delta": -0.09854762256145477, + "fcm_dpo/margin": 214.25892639160156, + "fcm_dpo/q_t": 0.38599973917007446, + "grad_norm": 25.482728958129883, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.34231024980545044, + "logits/rejected": -0.3436782956123352, + "logps/chosen": -283.3717346191406, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -540.0429077148438, + "loss": 1.0151, + "margin_dpo/margin_mean": 214.25892639160156, + "margin_dpo/margin_std": 223.70123291015625, + "step": 289 + }, + { + "KL/chosen_KL_mean": -261.7184143066406, + "KL/mean": -369.31634521484375, + "KL/rejected_KL_mean": -476.91424560546875, + "KL/std": 220.097900390625, + "epoch": 0.42584434654919234, + "fcm_dpo/beta": 0.002251718658953905, + "fcm_dpo/delta": -0.08903662860393524, + "fcm_dpo/margin": 215.1958465576172, + "fcm_dpo/q_t": 0.39020198583602905, + "grad_norm": 21.68331527709961, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.3297966718673706, + "logits/rejected": -0.35255828499794006, + "logps/chosen": -317.1899108886719, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -593.622802734375, + "loss": 1.0375, + "margin_dpo/margin_mean": 215.1958465576172, + "margin_dpo/margin_std": 261.9568786621094, + "step": 290 + }, + { + "KL/chosen_KL_mean": -228.8701171875, + "KL/mean": -324.2679138183594, + "KL/rejected_KL_mean": -419.6656799316406, + "KL/std": 185.85340881347656, + "epoch": 0.42731277533039647, + "fcm_dpo/beta": 0.002243693685159087, + "fcm_dpo/delta": -0.02954481914639473, + "fcm_dpo/margin": 190.7955780029297, + "fcm_dpo/q_t": 0.400044322013855, + "grad_norm": 27.926626205444336, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.3716287612915039, + "logits/rejected": -0.3628491163253784, + "logps/chosen": -292.1304931640625, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -508.9627685546875, + "loss": 1.0652, + "margin_dpo/margin_mean": 190.79556274414062, + "margin_dpo/margin_std": 225.35430908203125, + "step": 291 + }, + { + "KL/chosen_KL_mean": -256.63824462890625, + "KL/mean": -359.156494140625, + "KL/rejected_KL_mean": -461.67474365234375, + "KL/std": 220.2518768310547, + "epoch": 0.4287812041116006, + "fcm_dpo/beta": 0.002220253925770521, + "fcm_dpo/delta": -0.057921458035707474, + "fcm_dpo/margin": 205.0364990234375, + "fcm_dpo/q_t": 0.39462825655937195, + "grad_norm": 22.149768829345703, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.381770521402359, + "logits/rejected": -0.3677070736885071, + "logps/chosen": -310.5567626953125, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -551.6361083984375, + "loss": 1.0614, + "margin_dpo/margin_mean": 205.0364990234375, + "margin_dpo/margin_std": 262.11480712890625, + "step": 292 + }, + { + "KL/chosen_KL_mean": -260.9930419921875, + "KL/mean": -341.6339416503906, + "KL/rejected_KL_mean": -422.27484130859375, + "KL/std": 220.97372436523438, + "epoch": 0.4302496328928047, + "fcm_dpo/beta": 0.002223607152700424, + "fcm_dpo/delta": 0.04278174415230751, + "fcm_dpo/margin": 161.2817840576172, + "fcm_dpo/q_t": 0.4189508557319641, + "grad_norm": 25.350297927856445, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.37213313579559326, + "logits/rejected": -0.3637707829475403, + "logps/chosen": -321.36907958984375, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.85244750976562, + "logps/rejected": -500.12725830078125, + "loss": 1.1311, + "margin_dpo/margin_mean": 161.2817840576172, + "margin_dpo/margin_std": 250.83364868164062, + "step": 293 + }, + { + "KL/chosen_KL_mean": -240.05978393554688, + "KL/mean": -329.448974609375, + "KL/rejected_KL_mean": -418.83819580078125, + "KL/std": 193.92105102539062, + "epoch": 0.43171806167400884, + "fcm_dpo/beta": 0.002221715170890093, + "fcm_dpo/delta": 0.002862263470888138, + "fcm_dpo/margin": 178.77838134765625, + "fcm_dpo/q_t": 0.4095104932785034, + "grad_norm": 21.2161808013916, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.4091721773147583, + "logits/rejected": -0.4162572920322418, + "logps/chosen": -288.1473083496094, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -500.73516845703125, + "loss": 1.0956, + "margin_dpo/margin_mean": 178.77838134765625, + "margin_dpo/margin_std": 243.22738647460938, + "step": 294 + }, + { + "KL/chosen_KL_mean": -289.834228515625, + "KL/mean": -390.12548828125, + "KL/rejected_KL_mean": -490.416748046875, + "KL/std": 226.3201904296875, + "epoch": 0.4331864904552129, + "fcm_dpo/beta": 0.0022136676125228405, + "fcm_dpo/delta": -0.04604334011673927, + "fcm_dpo/margin": 200.58251953125, + "fcm_dpo/q_t": 0.39920759201049805, + "grad_norm": 28.84412384033203, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.4074261784553528, + "logits/rejected": -0.409212589263916, + "logps/chosen": -339.7589111328125, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -577.873046875, + "loss": 1.0697, + "margin_dpo/margin_mean": 200.58251953125, + "margin_dpo/margin_std": 263.1807861328125, + "step": 295 + }, + { + "KL/chosen_KL_mean": -358.89825439453125, + "KL/mean": -427.7105712890625, + "KL/rejected_KL_mean": -496.5228576660156, + "KL/std": 208.72183227539062, + "epoch": 0.434654919236417, + "fcm_dpo/beta": 0.002201622352004051, + "fcm_dpo/delta": -0.008969346061348915, + "fcm_dpo/margin": 137.62461853027344, + "fcm_dpo/q_t": 0.4300941824913025, + "grad_norm": 32.84729766845703, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.4741077721118927, + "logits/rejected": -0.45637625455856323, + "logps/chosen": -424.3894958496094, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -589.6119384765625, + "loss": 1.1886, + "margin_dpo/margin_mean": 137.62461853027344, + "margin_dpo/margin_std": 264.7969665527344, + "step": 296 + }, + { + "KL/chosen_KL_mean": -307.435546875, + "KL/mean": -389.3873291015625, + "KL/rejected_KL_mean": -471.3391418457031, + "KL/std": 198.08912658691406, + "epoch": 0.43612334801762115, + "fcm_dpo/beta": 0.002210353035479784, + "fcm_dpo/delta": 0.03913431614637375, + "fcm_dpo/margin": 163.90359497070312, + "fcm_dpo/q_t": 0.4156332015991211, + "grad_norm": 34.864501953125, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.4226665794849396, + "logits/rejected": -0.42391157150268555, + "logps/chosen": -363.9125061035156, + "logps/ref_chosen": -56.476951599121094, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -566.4776611328125, + "loss": 1.1084, + "margin_dpo/margin_mean": 163.90359497070312, + "margin_dpo/margin_std": 211.23016357421875, + "step": 297 + }, + { + "KL/chosen_KL_mean": -320.65106201171875, + "KL/mean": -428.41290283203125, + "KL/rejected_KL_mean": -536.1747436523438, + "KL/std": 268.907470703125, + "epoch": 0.43759177679882527, + "fcm_dpo/beta": 0.002200640505179763, + "fcm_dpo/delta": -0.07795488834381104, + "fcm_dpo/margin": 215.52365112304688, + "fcm_dpo/q_t": 0.3964436948299408, + "grad_norm": 34.56257247924805, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.4743010401725769, + "logits/rejected": -0.49710047245025635, + "logps/chosen": -387.97625732421875, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -652.8369140625, + "loss": 1.0729, + "margin_dpo/margin_mean": 215.52365112304688, + "margin_dpo/margin_std": 318.0645751953125, + "step": 298 + }, + { + "KL/chosen_KL_mean": -266.99639892578125, + "KL/mean": -358.8790283203125, + "KL/rejected_KL_mean": -450.76171875, + "KL/std": 197.55548095703125, + "epoch": 0.4390602055800294, + "fcm_dpo/beta": 0.00219709612429142, + "fcm_dpo/delta": -0.004710428416728973, + "fcm_dpo/margin": 183.76527404785156, + "fcm_dpo/q_t": 0.40845823287963867, + "grad_norm": 38.42890930175781, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.42935478687286377, + "logits/rejected": -0.4295846223831177, + "logps/chosen": -315.95849609375, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -535.0899658203125, + "loss": 1.0958, + "margin_dpo/margin_mean": 183.76528930664062, + "margin_dpo/margin_std": 249.51800537109375, + "step": 299 + }, + { + "KL/chosen_KL_mean": -330.11163330078125, + "KL/mean": -438.03948974609375, + "KL/rejected_KL_mean": -545.9674072265625, + "KL/std": 232.30531311035156, + "epoch": 0.44052863436123346, + "fcm_dpo/beta": 0.0021653189323842525, + "fcm_dpo/delta": -0.07065486907958984, + "fcm_dpo/margin": 215.855712890625, + "fcm_dpo/q_t": 0.3930676281452179, + "grad_norm": 31.024282455444336, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.4451923668384552, + "logits/rejected": -0.4505726099014282, + "logps/chosen": -389.18536376953125, + "logps/ref_chosen": -59.07371139526367, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -641.933837890625, + "loss": 1.0619, + "margin_dpo/margin_mean": 215.855712890625, + "margin_dpo/margin_std": 283.98614501953125, + "step": 300 + }, + { + "KL/chosen_KL_mean": -276.162353515625, + "KL/mean": -384.3056640625, + "KL/rejected_KL_mean": -492.4489440917969, + "KL/std": 221.1756591796875, + "epoch": 0.4419970631424376, + "fcm_dpo/beta": 0.002126394771039486, + "fcm_dpo/delta": -0.0630912333726883, + "fcm_dpo/margin": 216.28659057617188, + "fcm_dpo/q_t": 0.3957647681236267, + "grad_norm": 25.04326629638672, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.45365726947784424, + "logits/rejected": -0.45016711950302124, + "logps/chosen": -333.4117431640625, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -584.802490234375, + "loss": 1.048, + "margin_dpo/margin_mean": 216.28659057617188, + "margin_dpo/margin_std": 261.11767578125, + "step": 301 + }, + { + "KL/chosen_KL_mean": -260.8786315917969, + "KL/mean": -353.64752197265625, + "KL/rejected_KL_mean": -446.4163818359375, + "KL/std": 209.54000854492188, + "epoch": 0.4434654919236417, + "fcm_dpo/beta": 0.002119125798344612, + "fcm_dpo/delta": 0.006838139146566391, + "fcm_dpo/margin": 185.53775024414062, + "fcm_dpo/q_t": 0.41012802720069885, + "grad_norm": 35.94118118286133, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.4112318456172943, + "logits/rejected": -0.41728314757347107, + "logps/chosen": -312.0766296386719, + "logps/ref_chosen": -51.197994232177734, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -543.6427612304688, + "loss": 1.095, + "margin_dpo/margin_mean": 185.53775024414062, + "margin_dpo/margin_std": 245.56341552734375, + "step": 302 + }, + { + "KL/chosen_KL_mean": -266.37298583984375, + "KL/mean": -351.4562683105469, + "KL/rejected_KL_mean": -436.53955078125, + "KL/std": 212.11721801757812, + "epoch": 0.44493392070484583, + "fcm_dpo/beta": 0.0021344092674553394, + "fcm_dpo/delta": 0.03814225643873215, + "fcm_dpo/margin": 170.1665496826172, + "fcm_dpo/q_t": 0.4166523218154907, + "grad_norm": 29.625904083251953, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.4059848487377167, + "logits/rejected": -0.3904208242893219, + "logps/chosen": -333.0869445800781, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -523.4849853515625, + "loss": 1.1261, + "margin_dpo/margin_mean": 170.1665496826172, + "margin_dpo/margin_std": 254.2838897705078, + "step": 303 + }, + { + "KL/chosen_KL_mean": -242.7084503173828, + "KL/mean": -338.2328796386719, + "KL/rejected_KL_mean": -433.75732421875, + "KL/std": 192.66885375976562, + "epoch": 0.44640234948604995, + "fcm_dpo/beta": 0.0021487209014594555, + "fcm_dpo/delta": -0.011212758719921112, + "fcm_dpo/margin": 191.04885864257812, + "fcm_dpo/q_t": 0.40207067131996155, + "grad_norm": 24.7471981048584, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.44146013259887695, + "logits/rejected": -0.4333987236022949, + "logps/chosen": -314.6591491699219, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -524.2293701171875, + "loss": 1.0533, + "margin_dpo/margin_mean": 191.04885864257812, + "margin_dpo/margin_std": 175.52703857421875, + "step": 304 + }, + { + "KL/chosen_KL_mean": -261.6916809082031, + "KL/mean": -349.76568603515625, + "KL/rejected_KL_mean": -437.8396911621094, + "KL/std": 222.32864379882812, + "epoch": 0.447870778267254, + "fcm_dpo/beta": 0.0021360788960009813, + "fcm_dpo/delta": 0.02424509823322296, + "fcm_dpo/margin": 176.14801025390625, + "fcm_dpo/q_t": 0.41488200426101685, + "grad_norm": 25.216215133666992, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.4338444471359253, + "logits/rejected": -0.4185817837715149, + "logps/chosen": -328.4869079589844, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -530.5942993164062, + "loss": 1.1129, + "margin_dpo/margin_mean": 176.1479949951172, + "margin_dpo/margin_std": 249.10455322265625, + "step": 305 + }, + { + "KL/chosen_KL_mean": -267.495361328125, + "KL/mean": -352.1854248046875, + "KL/rejected_KL_mean": -436.8755798339844, + "KL/std": 201.3372802734375, + "epoch": 0.44933920704845814, + "fcm_dpo/beta": 0.0021633305586874485, + "fcm_dpo/delta": 0.03478704392910004, + "fcm_dpo/margin": 169.38018798828125, + "fcm_dpo/q_t": 0.415255069732666, + "grad_norm": 25.18739891052246, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.40468522906303406, + "logits/rejected": -0.37944549322128296, + "logps/chosen": -337.17926025390625, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -522.0347900390625, + "loss": 1.1056, + "margin_dpo/margin_mean": 169.38018798828125, + "margin_dpo/margin_std": 220.4286651611328, + "step": 306 + }, + { + "KL/chosen_KL_mean": -242.18780517578125, + "KL/mean": -331.582275390625, + "KL/rejected_KL_mean": -420.9767150878906, + "KL/std": 187.04690551757812, + "epoch": 0.45080763582966227, + "fcm_dpo/beta": 0.002166555728763342, + "fcm_dpo/delta": 0.01314252894371748, + "fcm_dpo/margin": 178.78890991210938, + "fcm_dpo/q_t": 0.4101484417915344, + "grad_norm": 25.99106788635254, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.4158622920513153, + "logits/rejected": -0.40115827322006226, + "logps/chosen": -312.35321044921875, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -507.94903564453125, + "loss": 1.0951, + "margin_dpo/margin_mean": 178.78890991210938, + "margin_dpo/margin_std": 231.8785400390625, + "step": 307 + }, + { + "KL/chosen_KL_mean": -253.13291931152344, + "KL/mean": -351.1971740722656, + "KL/rejected_KL_mean": -449.2614440917969, + "KL/std": 208.60134887695312, + "epoch": 0.4522760646108664, + "fcm_dpo/beta": 0.0021638874895870686, + "fcm_dpo/delta": -0.02547409199178219, + "fcm_dpo/margin": 196.12857055664062, + "fcm_dpo/q_t": 0.39992159605026245, + "grad_norm": 31.040889739990234, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.35027140378952026, + "logits/rejected": -0.3394496440887451, + "logps/chosen": -308.3779296875, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -528.6337280273438, + "loss": 1.0528, + "margin_dpo/margin_mean": 196.12855529785156, + "margin_dpo/margin_std": 203.5819091796875, + "step": 308 + }, + { + "KL/chosen_KL_mean": -259.13006591796875, + "KL/mean": -356.8582763671875, + "KL/rejected_KL_mean": -454.58648681640625, + "KL/std": 223.6021270751953, + "epoch": 0.45374449339207046, + "fcm_dpo/beta": 0.002161671407520771, + "fcm_dpo/delta": -0.02382533997297287, + "fcm_dpo/margin": 195.45639038085938, + "fcm_dpo/q_t": 0.40092766284942627, + "grad_norm": 28.08131217956543, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.36709824204444885, + "logits/rejected": -0.3614857792854309, + "logps/chosen": -308.08917236328125, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -536.92724609375, + "loss": 1.0705, + "margin_dpo/margin_mean": 195.45639038085938, + "margin_dpo/margin_std": 234.07315063476562, + "step": 309 + }, + { + "KL/chosen_KL_mean": -300.7269287109375, + "KL/mean": -390.6546630859375, + "KL/rejected_KL_mean": -480.58233642578125, + "KL/std": 196.88262939453125, + "epoch": 0.4552129221732746, + "fcm_dpo/beta": 0.0021615237928926945, + "fcm_dpo/delta": 0.011311601847410202, + "fcm_dpo/margin": 179.8553924560547, + "fcm_dpo/q_t": 0.40847909450531006, + "grad_norm": 24.29796600341797, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.41836071014404297, + "logits/rejected": -0.4063529372215271, + "logps/chosen": -363.46875, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -560.5126342773438, + "loss": 1.0811, + "margin_dpo/margin_mean": 179.85537719726562, + "margin_dpo/margin_std": 198.85214233398438, + "step": 310 + }, + { + "KL/chosen_KL_mean": -314.01068115234375, + "KL/mean": -412.86981201171875, + "KL/rejected_KL_mean": -511.7289123535156, + "KL/std": 246.66839599609375, + "epoch": 0.4566813509544787, + "fcm_dpo/beta": 0.0021447776816785336, + "fcm_dpo/delta": -0.025147156789898872, + "fcm_dpo/margin": 197.71823120117188, + "fcm_dpo/q_t": 0.40380626916885376, + "grad_norm": 25.550621032714844, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.38108277320861816, + "logits/rejected": -0.356780469417572, + "logps/chosen": -367.0386657714844, + "logps/ref_chosen": -53.02798080444336, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -589.1671142578125, + "loss": 1.0811, + "margin_dpo/margin_mean": 197.71823120117188, + "margin_dpo/margin_std": 265.0999450683594, + "step": 311 + }, + { + "KL/chosen_KL_mean": -291.0233154296875, + "KL/mean": -382.343017578125, + "KL/rejected_KL_mean": -473.6627197265625, + "KL/std": 216.25013732910156, + "epoch": 0.4581497797356828, + "fcm_dpo/beta": 0.002153100911527872, + "fcm_dpo/delta": 0.00674719363451004, + "fcm_dpo/margin": 182.63937377929688, + "fcm_dpo/q_t": 0.4111559987068176, + "grad_norm": 22.847942352294922, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.38960134983062744, + "logits/rejected": -0.3816367983818054, + "logps/chosen": -340.41552734375, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280853271484, + "logps/rejected": -549.4555053710938, + "loss": 1.1007, + "margin_dpo/margin_mean": 182.63937377929688, + "margin_dpo/margin_std": 256.60113525390625, + "step": 312 + }, + { + "KL/chosen_KL_mean": -274.72918701171875, + "KL/mean": -370.8074951171875, + "KL/rejected_KL_mean": -466.88580322265625, + "KL/std": 231.34361267089844, + "epoch": 0.45961820851688695, + "fcm_dpo/beta": 0.002157143084332347, + "fcm_dpo/delta": -0.016167178750038147, + "fcm_dpo/margin": 192.15664672851562, + "fcm_dpo/q_t": 0.4041683077812195, + "grad_norm": 31.167348861694336, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.46091365814208984, + "logits/rejected": -0.4598471522331238, + "logps/chosen": -324.88189697265625, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -553.2919921875, + "loss": 1.0858, + "margin_dpo/margin_mean": 192.15664672851562, + "margin_dpo/margin_std": 247.33575439453125, + "step": 313 + }, + { + "KL/chosen_KL_mean": -303.0827941894531, + "KL/mean": -394.4405517578125, + "KL/rejected_KL_mean": -485.79833984375, + "KL/std": 226.08700561523438, + "epoch": 0.461086637298091, + "fcm_dpo/beta": 0.0021345026325434446, + "fcm_dpo/delta": 0.010156366974115372, + "fcm_dpo/margin": 182.71548461914062, + "fcm_dpo/q_t": 0.4131431579589844, + "grad_norm": 20.98644256591797, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.4531956911087036, + "logits/rejected": -0.45664170384407043, + "logps/chosen": -360.32037353515625, + "logps/ref_chosen": -57.237579345703125, + "logps/ref_rejected": -97.5965347290039, + "logps/rejected": -583.3948974609375, + "loss": 1.1244, + "margin_dpo/margin_mean": 182.71548461914062, + "margin_dpo/margin_std": 292.40838623046875, + "step": 314 + }, + { + "KL/chosen_KL_mean": -276.57342529296875, + "KL/mean": -360.9225769042969, + "KL/rejected_KL_mean": -445.271728515625, + "KL/std": 206.61561584472656, + "epoch": 0.46255506607929514, + "fcm_dpo/beta": 0.002153775654733181, + "fcm_dpo/delta": 0.038061730563640594, + "fcm_dpo/margin": 168.69830322265625, + "fcm_dpo/q_t": 0.41625896096229553, + "grad_norm": 22.165285110473633, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.36769017577171326, + "logits/rejected": -0.37278226017951965, + "logps/chosen": -325.64300537109375, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -530.95263671875, + "loss": 1.1123, + "margin_dpo/margin_mean": 168.69830322265625, + "margin_dpo/margin_std": 228.2129364013672, + "step": 315 + }, + { + "KL/chosen_KL_mean": -269.3196105957031, + "KL/mean": -389.4281921386719, + "KL/rejected_KL_mean": -509.5367736816406, + "KL/std": 240.0266571044922, + "epoch": 0.46402349486049926, + "fcm_dpo/beta": 0.0021110116504132748, + "fcm_dpo/delta": -0.11421408504247665, + "fcm_dpo/margin": 240.21713256835938, + "fcm_dpo/q_t": 0.38528791069984436, + "grad_norm": 26.478769302368164, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.4204370975494385, + "logits/rejected": -0.42546719312667847, + "logps/chosen": -323.58038330078125, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -610.8182373046875, + "loss": 1.0169, + "margin_dpo/margin_mean": 240.21714782714844, + "margin_dpo/margin_std": 269.4544677734375, + "step": 316 + }, + { + "KL/chosen_KL_mean": -275.29345703125, + "KL/mean": -406.8150939941406, + "KL/rejected_KL_mean": -538.336669921875, + "KL/std": 220.98388671875, + "epoch": 0.4654919236417034, + "fcm_dpo/beta": 0.0020721801556646824, + "fcm_dpo/delta": -0.153153657913208, + "fcm_dpo/margin": 263.04327392578125, + "fcm_dpo/q_t": 0.37356036901474, + "grad_norm": 29.60879898071289, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.4123254120349884, + "logits/rejected": -0.4049742817878723, + "logps/chosen": -331.3876953125, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -639.0357666015625, + "loss": 0.9783, + "margin_dpo/margin_mean": 263.04327392578125, + "margin_dpo/margin_std": 248.7153778076172, + "step": 317 + }, + { + "KL/chosen_KL_mean": -307.56475830078125, + "KL/mean": -399.586669921875, + "KL/rejected_KL_mean": -491.6085205078125, + "KL/std": 220.44793701171875, + "epoch": 0.4669603524229075, + "fcm_dpo/beta": 0.0020595774985849857, + "fcm_dpo/delta": 0.021566076204180717, + "fcm_dpo/margin": 184.04380798339844, + "fcm_dpo/q_t": 0.4124523401260376, + "grad_norm": 24.619739532470703, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.458835244178772, + "logits/rejected": -0.4265810549259186, + "logps/chosen": -372.21044921875, + "logps/ref_chosen": -64.64569854736328, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -574.372802734375, + "loss": 1.1006, + "margin_dpo/margin_mean": 184.0438232421875, + "margin_dpo/margin_std": 241.61331176757812, + "step": 318 + }, + { + "KL/chosen_KL_mean": -269.0120849609375, + "KL/mean": -385.52276611328125, + "KL/rejected_KL_mean": -502.0334777832031, + "KL/std": 247.28811645507812, + "epoch": 0.4684287812041116, + "fcm_dpo/beta": 0.0020323502831161022, + "fcm_dpo/delta": -0.07725685834884644, + "fcm_dpo/margin": 233.02137756347656, + "fcm_dpo/q_t": 0.3905225992202759, + "grad_norm": 23.96953010559082, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.3532301187515259, + "logits/rejected": -0.3711482882499695, + "logps/chosen": -318.3958435058594, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -615.93994140625, + "loss": 1.035, + "margin_dpo/margin_mean": 233.0213623046875, + "margin_dpo/margin_std": 266.6269226074219, + "step": 319 + }, + { + "KL/chosen_KL_mean": -284.35699462890625, + "KL/mean": -408.48895263671875, + "KL/rejected_KL_mean": -532.6209106445312, + "KL/std": 253.5668487548828, + "epoch": 0.4698972099853157, + "fcm_dpo/beta": 0.0019884873181581497, + "fcm_dpo/delta": -0.09909307956695557, + "fcm_dpo/margin": 248.263916015625, + "fcm_dpo/q_t": 0.3870677351951599, + "grad_norm": 23.290699005126953, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.41272127628326416, + "logits/rejected": -0.4184020161628723, + "logps/chosen": -343.86187744140625, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66717529296875, + "logps/rejected": -630.2880859375, + "loss": 1.0227, + "margin_dpo/margin_mean": 248.263916015625, + "margin_dpo/margin_std": 279.31939697265625, + "step": 320 + }, + { + "KL/chosen_KL_mean": -348.3397216796875, + "KL/mean": -455.6825256347656, + "KL/rejected_KL_mean": -563.0252685546875, + "KL/std": 267.32122802734375, + "epoch": 0.4713656387665198, + "fcm_dpo/beta": 0.0019679851830005646, + "fcm_dpo/delta": -0.024129830300807953, + "fcm_dpo/margin": 214.68557739257812, + "fcm_dpo/q_t": 0.4036102294921875, + "grad_norm": 23.15062713623047, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.40483784675598145, + "logits/rejected": -0.3935700058937073, + "logps/chosen": -409.8883972167969, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -654.6663818359375, + "loss": 1.082, + "margin_dpo/margin_mean": 214.68557739257812, + "margin_dpo/margin_std": 282.3053283691406, + "step": 321 + }, + { + "KL/chosen_KL_mean": -306.1932373046875, + "KL/mean": -422.8832702636719, + "KL/rejected_KL_mean": -539.5733642578125, + "KL/std": 236.69369506835938, + "epoch": 0.47283406754772395, + "fcm_dpo/beta": 0.0019517629407346249, + "fcm_dpo/delta": -0.05857790261507034, + "fcm_dpo/margin": 233.38006591796875, + "fcm_dpo/q_t": 0.39376458525657654, + "grad_norm": 24.584089279174805, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.3697229027748108, + "logits/rejected": -0.36216434836387634, + "logps/chosen": -363.4832763671875, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -635.3232421875, + "loss": 1.0394, + "margin_dpo/margin_mean": 233.3800811767578, + "margin_dpo/margin_std": 251.07766723632812, + "step": 322 + }, + { + "KL/chosen_KL_mean": -326.294189453125, + "KL/mean": -422.7344970703125, + "KL/rejected_KL_mean": -519.1747436523438, + "KL/std": 234.59567260742188, + "epoch": 0.47430249632892807, + "fcm_dpo/beta": 0.0019555268809199333, + "fcm_dpo/delta": 0.02367909625172615, + "fcm_dpo/margin": 192.8805694580078, + "fcm_dpo/q_t": 0.41228896379470825, + "grad_norm": 30.523874282836914, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.3859459161758423, + "logits/rejected": -0.37858086824417114, + "logps/chosen": -377.5281677246094, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -594.2366943359375, + "loss": 1.1079, + "margin_dpo/margin_mean": 192.88055419921875, + "margin_dpo/margin_std": 258.48101806640625, + "step": 323 + }, + { + "KL/chosen_KL_mean": -374.3531494140625, + "KL/mean": -466.55242919921875, + "KL/rejected_KL_mean": -558.751708984375, + "KL/std": 251.7170867919922, + "epoch": 0.47577092511013214, + "fcm_dpo/beta": 0.001949124038219452, + "fcm_dpo/delta": -0.06465040892362595, + "fcm_dpo/margin": 184.39852905273438, + "fcm_dpo/q_t": 0.41932082176208496, + "grad_norm": 36.11357498168945, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.4411655068397522, + "logits/rejected": -0.42205148935317993, + "logps/chosen": -439.48834228515625, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750854492188, + "logps/rejected": -645.229248046875, + "loss": 1.1457, + "margin_dpo/margin_mean": 184.39854431152344, + "margin_dpo/margin_std": 294.26849365234375, + "step": 324 + }, + { + "KL/chosen_KL_mean": -294.81378173828125, + "KL/mean": -377.78643798828125, + "KL/rejected_KL_mean": -460.75909423828125, + "KL/std": 227.09103393554688, + "epoch": 0.47723935389133626, + "fcm_dpo/beta": 0.0019613862968981266, + "fcm_dpo/delta": 0.07689196616411209, + "fcm_dpo/margin": 165.94529724121094, + "fcm_dpo/q_t": 0.42428165674209595, + "grad_norm": 28.9737548828125, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.4612424969673157, + "logits/rejected": -0.4356522858142853, + "logps/chosen": -351.0293884277344, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.08592987060547, + "logps/rejected": -530.844970703125, + "loss": 1.1393, + "margin_dpo/margin_mean": 165.9453125, + "margin_dpo/margin_std": 232.77090454101562, + "step": 325 + }, + { + "KL/chosen_KL_mean": -328.23577880859375, + "KL/mean": -415.43890380859375, + "KL/rejected_KL_mean": -502.64208984375, + "KL/std": 211.7913818359375, + "epoch": 0.4787077826725404, + "fcm_dpo/beta": 0.001981673063710332, + "fcm_dpo/delta": 0.05636116489768028, + "fcm_dpo/margin": 174.4063262939453, + "fcm_dpo/q_t": 0.4185304045677185, + "grad_norm": 27.75748634338379, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.4789705276489258, + "logits/rejected": -0.4462633728981018, + "logps/chosen": -400.960693359375, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.8467788696289, + "logps/rejected": -582.4888916015625, + "loss": 1.1104, + "margin_dpo/margin_mean": 174.4063262939453, + "margin_dpo/margin_std": 205.46969604492188, + "step": 326 + }, + { + "KL/chosen_KL_mean": -288.17822265625, + "KL/mean": -407.74261474609375, + "KL/rejected_KL_mean": -527.3070068359375, + "KL/std": 220.7349090576172, + "epoch": 0.4801762114537445, + "fcm_dpo/beta": 0.001960520865395665, + "fcm_dpo/delta": -0.07271062582731247, + "fcm_dpo/margin": 239.12875366210938, + "fcm_dpo/q_t": 0.390238493680954, + "grad_norm": 28.559314727783203, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.42903268337249756, + "logits/rejected": -0.41195765137672424, + "logps/chosen": -357.3126525878906, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -639.2407836914062, + "loss": 1.0326, + "margin_dpo/margin_mean": 239.12875366210938, + "margin_dpo/margin_std": 257.76385498046875, + "step": 327 + }, + { + "KL/chosen_KL_mean": -292.6146240234375, + "KL/mean": -400.96087646484375, + "KL/rejected_KL_mean": -509.3071594238281, + "KL/std": 242.0338134765625, + "epoch": 0.48164464023494863, + "fcm_dpo/beta": 0.001954542938619852, + "fcm_dpo/delta": -0.024569327011704445, + "fcm_dpo/margin": 216.6925811767578, + "fcm_dpo/q_t": 0.40333792567253113, + "grad_norm": 22.35950469970703, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.4434688091278076, + "logits/rejected": -0.4467797577381134, + "logps/chosen": -352.30181884765625, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -600.1621704101562, + "loss": 1.0738, + "margin_dpo/margin_mean": 216.69256591796875, + "margin_dpo/margin_std": 276.7907409667969, + "step": 328 + }, + { + "KL/chosen_KL_mean": -322.8833923339844, + "KL/mean": -433.7863464355469, + "KL/rejected_KL_mean": -544.6892700195312, + "KL/std": 254.3605194091797, + "epoch": 0.4831130690161527, + "fcm_dpo/beta": 0.0019329939968883991, + "fcm_dpo/delta": -0.030582299456000328, + "fcm_dpo/margin": 221.80587768554688, + "fcm_dpo/q_t": 0.4004287123680115, + "grad_norm": 30.763805389404297, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.42895740270614624, + "logits/rejected": -0.42029309272766113, + "logps/chosen": -388.12957763671875, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -645.386962890625, + "loss": 1.0718, + "margin_dpo/margin_mean": 221.80587768554688, + "margin_dpo/margin_std": 272.5767517089844, + "step": 329 + }, + { + "KL/chosen_KL_mean": -270.37603759765625, + "KL/mean": -380.49884033203125, + "KL/rejected_KL_mean": -490.62164306640625, + "KL/std": 230.03123474121094, + "epoch": 0.4845814977973568, + "fcm_dpo/beta": 0.001930012134835124, + "fcm_dpo/delta": -0.026210233569145203, + "fcm_dpo/margin": 220.24560546875, + "fcm_dpo/q_t": 0.40108194947242737, + "grad_norm": 25.122922897338867, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.3836897611618042, + "logits/rejected": -0.38447412848472595, + "logps/chosen": -317.3743591308594, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -577.4984741210938, + "loss": 1.0643, + "margin_dpo/margin_mean": 220.24560546875, + "margin_dpo/margin_std": 259.4215087890625, + "step": 330 + }, + { + "KL/chosen_KL_mean": -291.59906005859375, + "KL/mean": -409.332275390625, + "KL/rejected_KL_mean": -527.0654907226562, + "KL/std": 194.7655029296875, + "epoch": 0.48604992657856094, + "fcm_dpo/beta": 0.0019126099068671465, + "fcm_dpo/delta": -0.05275537818670273, + "fcm_dpo/margin": 235.46646118164062, + "fcm_dpo/q_t": 0.39248865842819214, + "grad_norm": 23.949661254882812, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.44039618968963623, + "logits/rejected": -0.4362325668334961, + "logps/chosen": -342.123291015625, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -616.0809326171875, + "loss": 1.0157, + "margin_dpo/margin_mean": 235.46646118164062, + "margin_dpo/margin_std": 187.62429809570312, + "step": 331 + }, + { + "KL/chosen_KL_mean": -296.7283935546875, + "KL/mean": -378.4895935058594, + "KL/rejected_KL_mean": -460.2508544921875, + "KL/std": 197.91973876953125, + "epoch": 0.48751835535976507, + "fcm_dpo/beta": 0.0019352274248376489, + "fcm_dpo/delta": 0.0861460417509079, + "fcm_dpo/margin": 163.52247619628906, + "fcm_dpo/q_t": 0.4261719584465027, + "grad_norm": 22.072265625, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.43224036693573, + "logits/rejected": -0.41115298867225647, + "logps/chosen": -345.9086608886719, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -536.7359619140625, + "loss": 1.1374, + "margin_dpo/margin_mean": 163.52249145507812, + "margin_dpo/margin_std": 219.849853515625, + "step": 332 + }, + { + "KL/chosen_KL_mean": -331.73468017578125, + "KL/mean": -443.3945617675781, + "KL/rejected_KL_mean": -555.054443359375, + "KL/std": 243.6097412109375, + "epoch": 0.4889867841409692, + "fcm_dpo/beta": 0.0019211724866181612, + "fcm_dpo/delta": -0.03121526539325714, + "fcm_dpo/margin": 223.31976318359375, + "fcm_dpo/q_t": 0.40155458450317383, + "grad_norm": 22.271997451782227, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.44901585578918457, + "logits/rejected": -0.4433661699295044, + "logps/chosen": -395.4904479980469, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -650.0985717773438, + "loss": 1.0795, + "margin_dpo/margin_mean": 223.31976318359375, + "margin_dpo/margin_std": 292.02545166015625, + "step": 333 + }, + { + "KL/chosen_KL_mean": -331.0152587890625, + "KL/mean": -430.8121643066406, + "KL/rejected_KL_mean": -530.609130859375, + "KL/std": 269.2216796875, + "epoch": 0.49045521292217326, + "fcm_dpo/beta": 0.001930908882059157, + "fcm_dpo/delta": 0.015167435631155968, + "fcm_dpo/margin": 199.5938720703125, + "fcm_dpo/q_t": 0.4123017191886902, + "grad_norm": 26.76993179321289, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.44665104150772095, + "logits/rejected": -0.44349536299705505, + "logps/chosen": -397.9949951171875, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -625.926025390625, + "loss": 1.1182, + "margin_dpo/margin_mean": 199.5938720703125, + "margin_dpo/margin_std": 301.0901794433594, + "step": 334 + }, + { + "KL/chosen_KL_mean": -356.3892517089844, + "KL/mean": -430.55914306640625, + "KL/rejected_KL_mean": -504.72900390625, + "KL/std": 242.3770294189453, + "epoch": 0.4919236417033774, + "fcm_dpo/beta": 0.001974080689251423, + "fcm_dpo/delta": 0.10976044833660126, + "fcm_dpo/margin": 148.3397216796875, + "fcm_dpo/q_t": 0.4333241581916809, + "grad_norm": 35.3482666015625, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.482355535030365, + "logits/rejected": -0.4589642584323883, + "logps/chosen": -418.9317321777344, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.61770629882812, + "logps/rejected": -592.3466796875, + "loss": 1.1826, + "margin_dpo/margin_mean": 148.3397216796875, + "margin_dpo/margin_std": 268.9454650878906, + "step": 335 + }, + { + "KL/chosen_KL_mean": -359.19427490234375, + "KL/mean": -484.33697509765625, + "KL/rejected_KL_mean": -609.4796752929688, + "KL/std": 294.24444580078125, + "epoch": 0.4933920704845815, + "fcm_dpo/beta": 0.0019574996549636126, + "fcm_dpo/delta": -0.09448903799057007, + "fcm_dpo/margin": 250.28536987304688, + "fcm_dpo/q_t": 0.39117854833602905, + "grad_norm": 29.434301376342773, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.4395965039730072, + "logits/rejected": -0.44075945019721985, + "logps/chosen": -413.72540283203125, + "logps/ref_chosen": -54.53115463256836, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -713.8839111328125, + "loss": 1.0614, + "margin_dpo/margin_mean": 250.285400390625, + "margin_dpo/margin_std": 347.0870666503906, + "step": 336 + }, + { + "KL/chosen_KL_mean": -325.9806213378906, + "KL/mean": -445.74078369140625, + "KL/rejected_KL_mean": -565.5008544921875, + "KL/std": 236.7762908935547, + "epoch": 0.4948604992657856, + "fcm_dpo/beta": 0.001918459078297019, + "fcm_dpo/delta": -0.06264565885066986, + "fcm_dpo/margin": 239.52027893066406, + "fcm_dpo/q_t": 0.3944718539714813, + "grad_norm": 29.495193481445312, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.48390763998031616, + "logits/rejected": -0.4708746075630188, + "logps/chosen": -391.10931396484375, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -667.2279052734375, + "loss": 1.0447, + "margin_dpo/margin_mean": 239.52027893066406, + "margin_dpo/margin_std": 279.1695556640625, + "step": 337 + }, + { + "KL/chosen_KL_mean": -295.68658447265625, + "KL/mean": -416.0144958496094, + "KL/rejected_KL_mean": -536.3424072265625, + "KL/std": 231.92388916015625, + "epoch": 0.49632892804698975, + "fcm_dpo/beta": 0.0018986309878528118, + "fcm_dpo/delta": -0.059694305062294006, + "fcm_dpo/margin": 240.65579223632812, + "fcm_dpo/q_t": 0.392913818359375, + "grad_norm": 25.081783294677734, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.46351104974746704, + "logits/rejected": -0.4610709249973297, + "logps/chosen": -354.10931396484375, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -625.4109497070312, + "loss": 1.0302, + "margin_dpo/margin_mean": 240.65579223632812, + "margin_dpo/margin_std": 242.48886108398438, + "step": 338 + }, + { + "KL/chosen_KL_mean": -302.7047424316406, + "KL/mean": -416.17254638671875, + "KL/rejected_KL_mean": -529.640380859375, + "KL/std": 236.2411346435547, + "epoch": 0.4977973568281938, + "fcm_dpo/beta": 0.0018999692983925343, + "fcm_dpo/delta": -0.03361833840608597, + "fcm_dpo/margin": 226.93565368652344, + "fcm_dpo/q_t": 0.3996594548225403, + "grad_norm": 26.538564682006836, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.5198140740394592, + "logits/rejected": -0.5054018497467041, + "logps/chosen": -362.7000732421875, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -633.55126953125, + "loss": 1.0607, + "margin_dpo/margin_mean": 226.93565368652344, + "margin_dpo/margin_std": 250.84112548828125, + "step": 339 + }, + { + "KL/chosen_KL_mean": -318.005859375, + "KL/mean": -415.7840270996094, + "KL/rejected_KL_mean": -513.5621948242188, + "KL/std": 228.09695434570312, + "epoch": 0.49926578560939794, + "fcm_dpo/beta": 0.0018835279624909163, + "fcm_dpo/delta": 0.03270437568426132, + "fcm_dpo/margin": 195.55636596679688, + "fcm_dpo/q_t": 0.4155174195766449, + "grad_norm": 23.04448890686035, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.39705830812454224, + "logits/rejected": -0.3717266917228699, + "logps/chosen": -370.8360595703125, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723114013672, + "logps/rejected": -586.66943359375, + "loss": 1.1182, + "margin_dpo/margin_mean": 195.55636596679688, + "margin_dpo/margin_std": 281.4355163574219, + "step": 340 + }, + { + "KL/chosen_KL_mean": -308.8104248046875, + "KL/mean": -426.990478515625, + "KL/rejected_KL_mean": -545.1705322265625, + "KL/std": 248.2613983154297, + "epoch": 0.5007342143906021, + "fcm_dpo/beta": 0.0018748041475191712, + "fcm_dpo/delta": -0.04550610110163689, + "fcm_dpo/margin": 236.36013793945312, + "fcm_dpo/q_t": 0.39694035053253174, + "grad_norm": 26.15273094177246, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.42927074432373047, + "logits/rejected": -0.42871958017349243, + "logps/chosen": -356.7102355957031, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -646.98046875, + "loss": 1.0485, + "margin_dpo/margin_mean": 236.36013793945312, + "margin_dpo/margin_std": 258.4355163574219, + "step": 341 + }, + { + "KL/chosen_KL_mean": -302.05859375, + "KL/mean": -402.9062805175781, + "KL/rejected_KL_mean": -503.75396728515625, + "KL/std": 237.40493774414062, + "epoch": 0.5022026431718062, + "fcm_dpo/beta": 0.0018906050827354193, + "fcm_dpo/delta": 0.01890111342072487, + "fcm_dpo/margin": 201.69537353515625, + "fcm_dpo/q_t": 0.41175463795661926, + "grad_norm": 23.40822410583496, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.47432941198349, + "logits/rejected": -0.442875474691391, + "logps/chosen": -374.05523681640625, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -596.3435668945312, + "loss": 1.111, + "margin_dpo/margin_mean": 201.69537353515625, + "margin_dpo/margin_std": 286.6202392578125, + "step": 342 + }, + { + "KL/chosen_KL_mean": -271.26263427734375, + "KL/mean": -396.2135314941406, + "KL/rejected_KL_mean": -521.1644287109375, + "KL/std": 233.09645080566406, + "epoch": 0.5036710719530103, + "fcm_dpo/beta": 0.0018635441083461046, + "fcm_dpo/delta": -0.06895594298839569, + "fcm_dpo/margin": 249.90182495117188, + "fcm_dpo/q_t": 0.3894881308078766, + "grad_norm": 26.693635940551758, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.4619428813457489, + "logits/rejected": -0.4649538993835449, + "logps/chosen": -325.6682434082031, + "logps/ref_chosen": -54.405616760253906, + "logps/ref_rejected": -111.04142761230469, + "logps/rejected": -632.2058715820312, + "loss": 1.0107, + "margin_dpo/margin_mean": 249.90182495117188, + "margin_dpo/margin_std": 208.7919158935547, + "step": 343 + }, + { + "KL/chosen_KL_mean": -278.57086181640625, + "KL/mean": -390.90570068359375, + "KL/rejected_KL_mean": -503.2406005859375, + "KL/std": 266.9576416015625, + "epoch": 0.5051395007342144, + "fcm_dpo/beta": 0.0018696986371651292, + "fcm_dpo/delta": -0.022609613835811615, + "fcm_dpo/margin": 224.6697235107422, + "fcm_dpo/q_t": 0.40508079528808594, + "grad_norm": 26.9531192779541, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.4527415633201599, + "logits/rejected": -0.44961199164390564, + "logps/chosen": -332.5355224609375, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -593.8639526367188, + "loss": 1.0759, + "margin_dpo/margin_mean": 224.6697235107422, + "margin_dpo/margin_std": 269.2288818359375, + "step": 344 + }, + { + "KL/chosen_KL_mean": -335.9206237792969, + "KL/mean": -457.4391174316406, + "KL/rejected_KL_mean": -578.9576416015625, + "KL/std": 244.952392578125, + "epoch": 0.5066079295154186, + "fcm_dpo/beta": 0.0018422373104840517, + "fcm_dpo/delta": -0.0500393845140934, + "fcm_dpo/margin": 243.03695678710938, + "fcm_dpo/q_t": 0.395746111869812, + "grad_norm": 25.29005241394043, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.4590086340904236, + "logits/rejected": -0.4529004395008087, + "logps/chosen": -397.6063232421875, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49041748046875, + "logps/rejected": -678.447998046875, + "loss": 1.0482, + "margin_dpo/margin_mean": 243.03695678710938, + "margin_dpo/margin_std": 273.3504638671875, + "step": 345 + }, + { + "KL/chosen_KL_mean": -342.43505859375, + "KL/mean": -449.899658203125, + "KL/rejected_KL_mean": -557.3642578125, + "KL/std": 243.40853881835938, + "epoch": 0.5080763582966226, + "fcm_dpo/beta": 0.0018314840272068977, + "fcm_dpo/delta": 0.0066130333580076694, + "fcm_dpo/margin": 214.92916870117188, + "fcm_dpo/q_t": 0.4076474905014038, + "grad_norm": 26.261621475219727, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.42875561118125916, + "logits/rejected": -0.41599297523498535, + "logps/chosen": -397.69134521484375, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -634.779541015625, + "loss": 1.0887, + "margin_dpo/margin_mean": 214.92916870117188, + "margin_dpo/margin_std": 269.51617431640625, + "step": 346 + }, + { + "KL/chosen_KL_mean": -348.1890563964844, + "KL/mean": -457.7886047363281, + "KL/rejected_KL_mean": -567.38818359375, + "KL/std": 259.5652160644531, + "epoch": 0.5095447870778267, + "fcm_dpo/beta": 0.0018333385232836008, + "fcm_dpo/delta": -0.0019443881465122104, + "fcm_dpo/margin": 219.19906616210938, + "fcm_dpo/q_t": 0.4073731303215027, + "grad_norm": 21.977977752685547, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.423196017742157, + "logits/rejected": -0.4180574417114258, + "logps/chosen": -405.75531005859375, + "logps/ref_chosen": -57.56623840332031, + "logps/ref_rejected": -92.35509490966797, + "logps/rejected": -659.7432861328125, + "loss": 1.0904, + "margin_dpo/margin_mean": 219.19906616210938, + "margin_dpo/margin_std": 289.9748229980469, + "step": 347 + }, + { + "KL/chosen_KL_mean": -307.3543701171875, + "KL/mean": -404.4385070800781, + "KL/rejected_KL_mean": -501.52264404296875, + "KL/std": 220.54977416992188, + "epoch": 0.5110132158590308, + "fcm_dpo/beta": 0.0018353135092183948, + "fcm_dpo/delta": 0.04490099474787712, + "fcm_dpo/margin": 194.16824340820312, + "fcm_dpo/q_t": 0.4174761176109314, + "grad_norm": 25.500268936157227, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.44019395112991333, + "logits/rejected": -0.43998709321022034, + "logps/chosen": -363.67205810546875, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13836669921875, + "logps/rejected": -590.6610107421875, + "loss": 1.1141, + "margin_dpo/margin_mean": 194.16824340820312, + "margin_dpo/margin_std": 251.42027282714844, + "step": 348 + }, + { + "KL/chosen_KL_mean": -298.98748779296875, + "KL/mean": -411.78753662109375, + "KL/rejected_KL_mean": -524.587646484375, + "KL/std": 259.8543395996094, + "epoch": 0.5124816446402349, + "fcm_dpo/beta": 0.0018510316731408238, + "fcm_dpo/delta": -0.018560701981186867, + "fcm_dpo/margin": 225.6001739501953, + "fcm_dpo/q_t": 0.40646952390670776, + "grad_norm": 21.230770111083984, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.41702014207839966, + "logits/rejected": -0.41563892364501953, + "logps/chosen": -357.01300048828125, + "logps/ref_chosen": -58.025516510009766, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -622.0927734375, + "loss": 1.0942, + "margin_dpo/margin_mean": 225.60018920898438, + "margin_dpo/margin_std": 322.47943115234375, + "step": 349 + }, + { + "KL/chosen_KL_mean": -325.2149353027344, + "KL/mean": -437.28228759765625, + "KL/rejected_KL_mean": -549.349609375, + "KL/std": 247.470703125, + "epoch": 0.5139500734214391, + "fcm_dpo/beta": 0.0018313410691916943, + "fcm_dpo/delta": -0.011365924030542374, + "fcm_dpo/margin": 224.13467407226562, + "fcm_dpo/q_t": 0.40537285804748535, + "grad_norm": 27.918197631835938, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.47454479336738586, + "logits/rejected": -0.477811336517334, + "logps/chosen": -389.54541015625, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -639.2212524414062, + "loss": 1.0923, + "margin_dpo/margin_mean": 224.13467407226562, + "margin_dpo/margin_std": 306.51544189453125, + "step": 350 + }, + { + "KL/chosen_KL_mean": -301.2596130371094, + "KL/mean": -436.33697509765625, + "KL/rejected_KL_mean": -571.4143676757812, + "KL/std": 292.23785400390625, + "epoch": 0.5154185022026432, + "fcm_dpo/beta": 0.0018058628775179386, + "fcm_dpo/delta": -0.09307081252336502, + "fcm_dpo/margin": 270.1547546386719, + "fcm_dpo/q_t": 0.3909297585487366, + "grad_norm": 29.366575241088867, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.44908708333969116, + "logits/rejected": -0.44883760809898376, + "logps/chosen": -361.9317321777344, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -672.9797973632812, + "loss": 1.0433, + "margin_dpo/margin_mean": 270.15478515625, + "margin_dpo/margin_std": 341.22308349609375, + "step": 351 + }, + { + "KL/chosen_KL_mean": -359.5778503417969, + "KL/mean": -438.5601806640625, + "KL/rejected_KL_mean": -517.5425415039062, + "KL/std": 265.6382141113281, + "epoch": 0.5168869309838473, + "fcm_dpo/beta": 0.0018303534016013145, + "fcm_dpo/delta": 0.11433063447475433, + "fcm_dpo/margin": 157.96470642089844, + "fcm_dpo/q_t": 0.4339728057384491, + "grad_norm": 27.643993377685547, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.47322726249694824, + "logits/rejected": -0.4383177161216736, + "logps/chosen": -430.52130126953125, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -594.1845092773438, + "loss": 1.1878, + "margin_dpo/margin_mean": 157.96470642089844, + "margin_dpo/margin_std": 294.22454833984375, + "step": 352 + }, + { + "KL/chosen_KL_mean": -324.626953125, + "KL/mean": -434.9208068847656, + "KL/rejected_KL_mean": -545.214599609375, + "KL/std": 259.8445739746094, + "epoch": 0.5183553597650514, + "fcm_dpo/beta": 0.001833123154938221, + "fcm_dpo/delta": -0.005401637405157089, + "fcm_dpo/margin": 220.58767700195312, + "fcm_dpo/q_t": 0.4085083305835724, + "grad_norm": 36.342987060546875, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.48048973083496094, + "logits/rejected": -0.46663162112236023, + "logps/chosen": -383.02227783203125, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33553314208984, + "logps/rejected": -625.5501708984375, + "loss": 1.0973, + "margin_dpo/margin_mean": 220.58767700195312, + "margin_dpo/margin_std": 301.33837890625, + "step": 353 + }, + { + "KL/chosen_KL_mean": -281.23583984375, + "KL/mean": -396.7293701171875, + "KL/rejected_KL_mean": -512.222900390625, + "KL/std": 240.41439819335938, + "epoch": 0.5198237885462555, + "fcm_dpo/beta": 0.001836308278143406, + "fcm_dpo/delta": -0.025253523141145706, + "fcm_dpo/margin": 230.987060546875, + "fcm_dpo/q_t": 0.4003763198852539, + "grad_norm": 22.05979347229004, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.4675145745277405, + "logits/rejected": -0.4558253884315491, + "logps/chosen": -341.038818359375, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -600.9804077148438, + "loss": 1.0521, + "margin_dpo/margin_mean": 230.987060546875, + "margin_dpo/margin_std": 238.68994140625, + "step": 354 + }, + { + "KL/chosen_KL_mean": -278.87738037109375, + "KL/mean": -394.18597412109375, + "KL/rejected_KL_mean": -509.4945983886719, + "KL/std": 236.02166748046875, + "epoch": 0.5212922173274597, + "fcm_dpo/beta": 0.0018241136567667127, + "fcm_dpo/delta": -0.021784018725156784, + "fcm_dpo/margin": 230.61721801757812, + "fcm_dpo/q_t": 0.4008174538612366, + "grad_norm": 39.719356536865234, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.5206550359725952, + "logits/rejected": -0.5103884339332581, + "logps/chosen": -333.0058898925781, + "logps/ref_chosen": -54.12849807739258, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -591.900634765625, + "loss": 1.0547, + "margin_dpo/margin_mean": 230.6171875, + "margin_dpo/margin_std": 236.90528869628906, + "step": 355 + }, + { + "KL/chosen_KL_mean": -327.414794921875, + "KL/mean": -388.6732177734375, + "KL/rejected_KL_mean": -449.9316101074219, + "KL/std": 244.87353515625, + "epoch": 0.5227606461086637, + "fcm_dpo/beta": 0.0018336132634431124, + "fcm_dpo/delta": 0.03628718480467796, + "fcm_dpo/margin": 122.51679992675781, + "fcm_dpo/q_t": 0.4491380453109741, + "grad_norm": 27.5300350189209, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.4934132695198059, + "logits/rejected": -0.46816959977149963, + "logps/chosen": -392.088623046875, + "logps/ref_chosen": -64.6738052368164, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -525.8308715820312, + "loss": 1.2489, + "margin_dpo/margin_mean": 122.51680755615234, + "margin_dpo/margin_std": 300.3589782714844, + "step": 356 + }, + { + "KL/chosen_KL_mean": -283.6260681152344, + "KL/mean": -386.2947082519531, + "KL/rejected_KL_mean": -488.9633483886719, + "KL/std": 233.4674835205078, + "epoch": 0.5242290748898678, + "fcm_dpo/beta": 0.0018397256499156356, + "fcm_dpo/delta": 0.022911615669727325, + "fcm_dpo/margin": 205.3372802734375, + "fcm_dpo/q_t": 0.41156771779060364, + "grad_norm": 27.42888641357422, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.4698370695114136, + "logits/rejected": -0.4574124217033386, + "logps/chosen": -336.35186767578125, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -575.8045043945312, + "loss": 1.0915, + "margin_dpo/margin_mean": 205.3372802734375, + "margin_dpo/margin_std": 241.0042724609375, + "step": 357 + }, + { + "KL/chosen_KL_mean": -264.0908508300781, + "KL/mean": -358.65875244140625, + "KL/rejected_KL_mean": -453.22662353515625, + "KL/std": 234.24258422851562, + "epoch": 0.5256975036710719, + "fcm_dpo/beta": 0.001860738848336041, + "fcm_dpo/delta": 0.04985009878873825, + "fcm_dpo/margin": 189.13580322265625, + "fcm_dpo/q_t": 0.41889050602912903, + "grad_norm": 24.31972312927246, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.48346900939941406, + "logits/rejected": -0.4679170846939087, + "logps/chosen": -327.2962646484375, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -541.5999755859375, + "loss": 1.1185, + "margin_dpo/margin_mean": 189.13580322265625, + "margin_dpo/margin_std": 257.4248352050781, + "step": 358 + }, + { + "KL/chosen_KL_mean": -305.0037841796875, + "KL/mean": -414.48199462890625, + "KL/rejected_KL_mean": -523.960205078125, + "KL/std": 237.02291870117188, + "epoch": 0.527165932452276, + "fcm_dpo/beta": 0.0018613252323120832, + "fcm_dpo/delta": -0.007966313511133194, + "fcm_dpo/margin": 218.9563446044922, + "fcm_dpo/q_t": 0.4068043828010559, + "grad_norm": 26.327983856201172, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.43576061725616455, + "logits/rejected": -0.42485448718070984, + "logps/chosen": -361.3740234375, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -606.1339111328125, + "loss": 1.0842, + "margin_dpo/margin_mean": 218.95635986328125, + "margin_dpo/margin_std": 277.9966735839844, + "step": 359 + }, + { + "KL/chosen_KL_mean": -296.8334045410156, + "KL/mean": -387.6656188964844, + "KL/rejected_KL_mean": -478.4978332519531, + "KL/std": 208.1656951904297, + "epoch": 0.5286343612334802, + "fcm_dpo/beta": 0.001883307471871376, + "fcm_dpo/delta": 0.05981479212641716, + "fcm_dpo/margin": 181.6644287109375, + "fcm_dpo/q_t": 0.41872933506965637, + "grad_norm": 31.305709838867188, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.4398846924304962, + "logits/rejected": -0.425040602684021, + "logps/chosen": -348.2937927246094, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892059326172, + "logps/rejected": -548.3367919921875, + "loss": 1.1061, + "margin_dpo/margin_mean": 181.6644287109375, + "margin_dpo/margin_std": 195.6152801513672, + "step": 360 + }, + { + "KL/chosen_KL_mean": -330.93896484375, + "KL/mean": -420.14593505859375, + "KL/rejected_KL_mean": -509.3529052734375, + "KL/std": 238.32623291015625, + "epoch": 0.5301027900146843, + "fcm_dpo/beta": 0.0019093567971140146, + "fcm_dpo/delta": 0.061102624982595444, + "fcm_dpo/margin": 178.4139404296875, + "fcm_dpo/q_t": 0.4211677312850952, + "grad_norm": 29.791786193847656, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.4700263738632202, + "logits/rejected": -0.4652746319770813, + "logps/chosen": -384.8084716796875, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.7692642211914, + "logps/rejected": -600.1221923828125, + "loss": 1.1334, + "margin_dpo/margin_mean": 178.41392517089844, + "margin_dpo/margin_std": 260.59844970703125, + "step": 361 + }, + { + "KL/chosen_KL_mean": -293.47247314453125, + "KL/mean": -433.2166442871094, + "KL/rejected_KL_mean": -572.9608154296875, + "KL/std": 255.72384643554688, + "epoch": 0.5315712187958884, + "fcm_dpo/beta": 0.0018680819775909185, + "fcm_dpo/delta": -0.129384845495224, + "fcm_dpo/margin": 279.48834228515625, + "fcm_dpo/q_t": 0.37981897592544556, + "grad_norm": 21.00020408630371, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.4529603123664856, + "logits/rejected": -0.46245017647743225, + "logps/chosen": -352.1115417480469, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -678.542724609375, + "loss": 0.9909, + "margin_dpo/margin_mean": 279.48834228515625, + "margin_dpo/margin_std": 277.12896728515625, + "step": 362 + }, + { + "KL/chosen_KL_mean": -272.2425537109375, + "KL/mean": -413.42852783203125, + "KL/rejected_KL_mean": -554.614501953125, + "KL/std": 253.0455322265625, + "epoch": 0.5330396475770925, + "fcm_dpo/beta": 0.0018322591204196215, + "fcm_dpo/delta": -0.12359863519668579, + "fcm_dpo/margin": 282.3719482421875, + "fcm_dpo/q_t": 0.380690336227417, + "grad_norm": 26.93399429321289, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.4361415505409241, + "logits/rejected": -0.42436856031417847, + "logps/chosen": -316.80096435546875, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -629.3094482421875, + "loss": 0.9956, + "margin_dpo/margin_mean": 282.3719482421875, + "margin_dpo/margin_std": 281.8260803222656, + "step": 363 + }, + { + "KL/chosen_KL_mean": -291.7043151855469, + "KL/mean": -398.51031494140625, + "KL/rejected_KL_mean": -505.3163757324219, + "KL/std": 243.93292236328125, + "epoch": 0.5345080763582967, + "fcm_dpo/beta": 0.001825526007451117, + "fcm_dpo/delta": 0.010103408247232437, + "fcm_dpo/margin": 213.612060546875, + "fcm_dpo/q_t": 0.40954408049583435, + "grad_norm": 27.668123245239258, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.4502606987953186, + "logits/rejected": -0.46004268527030945, + "logps/chosen": -340.59893798828125, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -596.712158203125, + "loss": 1.0951, + "margin_dpo/margin_mean": 213.612060546875, + "margin_dpo/margin_std": 277.07574462890625, + "step": 364 + }, + { + "KL/chosen_KL_mean": -293.99603271484375, + "KL/mean": -406.5279541015625, + "KL/rejected_KL_mean": -519.0599365234375, + "KL/std": 261.5389099121094, + "epoch": 0.5359765051395007, + "fcm_dpo/beta": 0.0018164238426834345, + "fcm_dpo/delta": -0.009190201759338379, + "fcm_dpo/margin": 225.06381225585938, + "fcm_dpo/q_t": 0.4062727391719818, + "grad_norm": 21.68266487121582, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.4506559371948242, + "logits/rejected": -0.46002912521362305, + "logps/chosen": -345.48876953125, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -611.7615966796875, + "loss": 1.074, + "margin_dpo/margin_mean": 225.06381225585938, + "margin_dpo/margin_std": 269.61572265625, + "step": 365 + }, + { + "KL/chosen_KL_mean": -268.130615234375, + "KL/mean": -385.05804443359375, + "KL/rejected_KL_mean": -501.9854736328125, + "KL/std": 256.3507385253906, + "epoch": 0.5374449339207048, + "fcm_dpo/beta": 0.0018009209306910634, + "fcm_dpo/delta": -0.022701263427734375, + "fcm_dpo/margin": 233.8548583984375, + "fcm_dpo/q_t": 0.40347927808761597, + "grad_norm": 23.559268951416016, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.4541221857070923, + "logits/rejected": -0.46955257654190063, + "logps/chosen": -312.8511962890625, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -585.2958984375, + "loss": 1.0802, + "margin_dpo/margin_mean": 233.85484313964844, + "margin_dpo/margin_std": 303.1095275878906, + "step": 366 + }, + { + "KL/chosen_KL_mean": -283.7847900390625, + "KL/mean": -377.00250244140625, + "KL/rejected_KL_mean": -470.22015380859375, + "KL/std": 222.3704376220703, + "epoch": 0.5389133627019089, + "fcm_dpo/beta": 0.0018272295128554106, + "fcm_dpo/delta": 0.06128734350204468, + "fcm_dpo/margin": 186.4353485107422, + "fcm_dpo/q_t": 0.419744074344635, + "grad_norm": 18.683847427368164, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.49926167726516724, + "logits/rejected": -0.4835873246192932, + "logps/chosen": -342.1902160644531, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -546.971435546875, + "loss": 1.1238, + "margin_dpo/margin_mean": 186.4353485107422, + "margin_dpo/margin_std": 248.96206665039062, + "step": 367 + }, + { + "KL/chosen_KL_mean": -259.47998046875, + "KL/mean": -409.32464599609375, + "KL/rejected_KL_mean": -559.1693115234375, + "KL/std": 250.1630859375, + "epoch": 0.540381791483113, + "fcm_dpo/beta": 0.0017914584605023265, + "fcm_dpo/delta": -0.14462688565254211, + "fcm_dpo/margin": 299.6893310546875, + "fcm_dpo/q_t": 0.37201637029647827, + "grad_norm": 36.82114791870117, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.4651143252849579, + "logits/rejected": -0.4765470325946808, + "logps/chosen": -303.9324951171875, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -657.724609375, + "loss": 0.9563, + "margin_dpo/margin_mean": 299.6893310546875, + "margin_dpo/margin_std": 219.16549682617188, + "step": 368 + }, + { + "KL/chosen_KL_mean": -349.58837890625, + "KL/mean": -428.125244140625, + "KL/rejected_KL_mean": -506.66204833984375, + "KL/std": 251.02548217773438, + "epoch": 0.5418502202643172, + "fcm_dpo/beta": 0.0018156407168135047, + "fcm_dpo/delta": 0.11753154546022415, + "fcm_dpo/margin": 157.07363891601562, + "fcm_dpo/q_t": 0.43435177206993103, + "grad_norm": 28.619403839111328, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.47754406929016113, + "logits/rejected": -0.460124135017395, + "logps/chosen": -420.96990966796875, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -597.9578857421875, + "loss": 1.1905, + "margin_dpo/margin_mean": 157.07363891601562, + "margin_dpo/margin_std": 296.42974853515625, + "step": 369 + }, + { + "KL/chosen_KL_mean": -352.63336181640625, + "KL/mean": -427.6588134765625, + "KL/rejected_KL_mean": -502.6842346191406, + "KL/std": 246.27041625976562, + "epoch": 0.5433186490455213, + "fcm_dpo/beta": 0.0018587787635624409, + "fcm_dpo/delta": 0.12413851916790009, + "fcm_dpo/margin": 150.05091857910156, + "fcm_dpo/q_t": 0.43549519777297974, + "grad_norm": 25.48780059814453, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.47866642475128174, + "logits/rejected": -0.47251564264297485, + "logps/chosen": -424.2408447265625, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -599.9440307617188, + "loss": 1.1907, + "margin_dpo/margin_mean": 150.05091857910156, + "margin_dpo/margin_std": 276.411376953125, + "step": 370 + }, + { + "KL/chosen_KL_mean": -345.16607666015625, + "KL/mean": -453.348876953125, + "KL/rejected_KL_mean": -561.5316772460938, + "KL/std": 264.92401123046875, + "epoch": 0.5447870778267254, + "fcm_dpo/beta": 0.001868913066573441, + "fcm_dpo/delta": -0.004648314788937569, + "fcm_dpo/margin": 216.36563110351562, + "fcm_dpo/q_t": 0.40844932198524475, + "grad_norm": 24.080177307128906, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.5107867121696472, + "logits/rejected": -0.49860259890556335, + "logps/chosen": -414.58056640625, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -660.703857421875, + "loss": 1.1036, + "margin_dpo/margin_mean": 216.36563110351562, + "margin_dpo/margin_std": 315.74822998046875, + "step": 371 + }, + { + "KL/chosen_KL_mean": -327.27838134765625, + "KL/mean": -456.94696044921875, + "KL/rejected_KL_mean": -586.6155395507812, + "KL/std": 292.30975341796875, + "epoch": 0.5462555066079295, + "fcm_dpo/beta": 0.001838641008362174, + "fcm_dpo/delta": -0.08088327199220657, + "fcm_dpo/margin": 259.337158203125, + "fcm_dpo/q_t": 0.3929249942302704, + "grad_norm": 23.18116569519043, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.47245320677757263, + "logits/rejected": -0.44926324486732483, + "logps/chosen": -389.09637451171875, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53948974609375, + "logps/rejected": -665.155029296875, + "loss": 1.0477, + "margin_dpo/margin_mean": 259.3371887207031, + "margin_dpo/margin_std": 330.2596435546875, + "step": 372 + }, + { + "KL/chosen_KL_mean": -352.394775390625, + "KL/mean": -474.00799560546875, + "KL/rejected_KL_mean": -595.6212158203125, + "KL/std": 281.63482666015625, + "epoch": 0.5477239353891337, + "fcm_dpo/beta": 0.0018302889075130224, + "fcm_dpo/delta": -0.04753255099058151, + "fcm_dpo/margin": 243.22650146484375, + "fcm_dpo/q_t": 0.39616119861602783, + "grad_norm": 29.970626831054688, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.4819701910018921, + "logits/rejected": -0.45813024044036865, + "logps/chosen": -416.6119079589844, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -681.580810546875, + "loss": 1.0512, + "margin_dpo/margin_mean": 243.22650146484375, + "margin_dpo/margin_std": 269.6577453613281, + "step": 373 + }, + { + "KL/chosen_KL_mean": -333.73046875, + "KL/mean": -441.07891845703125, + "KL/rejected_KL_mean": -548.4273681640625, + "KL/std": 305.963623046875, + "epoch": 0.5491923641703378, + "fcm_dpo/beta": 0.0018100242596119642, + "fcm_dpo/delta": 0.011296160519123077, + "fcm_dpo/margin": 214.69683837890625, + "fcm_dpo/q_t": 0.4148157835006714, + "grad_norm": 37.50387954711914, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.5606328845024109, + "logits/rejected": -0.542881429195404, + "logps/chosen": -404.38067626953125, + "logps/ref_chosen": -70.65018463134766, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -642.0675048828125, + "loss": 1.128, + "margin_dpo/margin_mean": 214.6968536376953, + "margin_dpo/margin_std": 351.6225891113281, + "step": 374 + }, + { + "KL/chosen_KL_mean": -341.535888671875, + "KL/mean": -445.7072448730469, + "KL/rejected_KL_mean": -549.8786010742188, + "KL/std": 251.57171630859375, + "epoch": 0.5506607929515418, + "fcm_dpo/beta": 0.0018158955499529839, + "fcm_dpo/delta": 0.021893244236707687, + "fcm_dpo/margin": 208.34274291992188, + "fcm_dpo/q_t": 0.4129091203212738, + "grad_norm": 48.105567932128906, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.49703970551490784, + "logits/rejected": -0.4937781095504761, + "logps/chosen": -401.6160888671875, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -638.81689453125, + "loss": 1.1173, + "margin_dpo/margin_mean": 208.3427276611328, + "margin_dpo/margin_std": 301.77508544921875, + "step": 375 + }, + { + "KL/chosen_KL_mean": -335.3367919921875, + "KL/mean": -466.28643798828125, + "KL/rejected_KL_mean": -597.236083984375, + "KL/std": 274.84326171875, + "epoch": 0.5521292217327459, + "fcm_dpo/beta": 0.0018059706781059504, + "fcm_dpo/delta": -0.07673737406730652, + "fcm_dpo/margin": 261.89935302734375, + "fcm_dpo/q_t": 0.39158695936203003, + "grad_norm": 26.295751571655273, + "learning_rate": 2.5e-07, + "logits/chosen": -0.5031468272209167, + "logits/rejected": -0.49027374386787415, + "logps/chosen": -397.9971008300781, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.52660369873047, + "logps/rejected": -702.7626953125, + "loss": 1.0434, + "margin_dpo/margin_mean": 261.89935302734375, + "margin_dpo/margin_std": 315.4093322753906, + "step": 376 + }, + { + "KL/chosen_KL_mean": -340.5239562988281, + "KL/mean": -469.12371826171875, + "KL/rejected_KL_mean": -597.7235107421875, + "KL/std": 295.64276123046875, + "epoch": 0.55359765051395, + "fcm_dpo/beta": 0.0017904455307871103, + "fcm_dpo/delta": -0.06344657391309738, + "fcm_dpo/margin": 257.19952392578125, + "fcm_dpo/q_t": 0.39518678188323975, + "grad_norm": 22.948881149291992, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.4441227614879608, + "logits/rejected": -0.44588595628738403, + "logps/chosen": -395.002685546875, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -696.4268188476562, + "loss": 1.0531, + "margin_dpo/margin_mean": 257.19952392578125, + "margin_dpo/margin_std": 316.891357421875, + "step": 377 + }, + { + "KL/chosen_KL_mean": -318.0654296875, + "KL/mean": -454.87060546875, + "KL/rejected_KL_mean": -591.67578125, + "KL/std": 273.9302978515625, + "epoch": 0.5550660792951542, + "fcm_dpo/beta": 0.0017552496865391731, + "fcm_dpo/delta": -0.08436104655265808, + "fcm_dpo/margin": 273.6103820800781, + "fcm_dpo/q_t": 0.38744112849235535, + "grad_norm": 28.960853576660156, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.4610844552516937, + "logits/rejected": -0.47619086503982544, + "logps/chosen": -363.0859680175781, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -679.7227783203125, + "loss": 1.0243, + "margin_dpo/margin_mean": 273.6103515625, + "margin_dpo/margin_std": 292.38726806640625, + "step": 378 + }, + { + "KL/chosen_KL_mean": -354.04534912109375, + "KL/mean": -489.762939453125, + "KL/rejected_KL_mean": -625.4805908203125, + "KL/std": 277.69482421875, + "epoch": 0.5565345080763583, + "fcm_dpo/beta": 0.001722155138850212, + "fcm_dpo/delta": -0.07153955847024918, + "fcm_dpo/margin": 271.4352722167969, + "fcm_dpo/q_t": 0.3932555019855499, + "grad_norm": 25.876663208007812, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.43229052424430847, + "logits/rejected": -0.4548417925834656, + "logps/chosen": -407.2274475097656, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.3001708984375, + "logps/rejected": -739.78076171875, + "loss": 1.0471, + "margin_dpo/margin_mean": 271.4352722167969, + "margin_dpo/margin_std": 328.153076171875, + "step": 379 + }, + { + "KL/chosen_KL_mean": -363.6824645996094, + "KL/mean": -511.7635192871094, + "KL/rejected_KL_mean": -659.8446044921875, + "KL/std": 315.0853576660156, + "epoch": 0.5580029368575624, + "fcm_dpo/beta": 0.0016977135092020035, + "fcm_dpo/delta": -0.10816927254199982, + "fcm_dpo/margin": 296.16217041015625, + "fcm_dpo/q_t": 0.3837231993675232, + "grad_norm": 24.72132682800293, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.4478057622909546, + "logits/rejected": -0.47531557083129883, + "logps/chosen": -415.0354919433594, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -764.0363159179688, + "loss": 1.0225, + "margin_dpo/margin_mean": 296.16217041015625, + "margin_dpo/margin_std": 341.88568115234375, + "step": 380 + }, + { + "KL/chosen_KL_mean": -377.09307861328125, + "KL/mean": -475.0587158203125, + "KL/rejected_KL_mean": -573.0244140625, + "KL/std": 262.92779541015625, + "epoch": 0.5594713656387665, + "fcm_dpo/beta": 0.0017013371689245105, + "fcm_dpo/delta": 0.06901153177022934, + "fcm_dpo/margin": 195.93124389648438, + "fcm_dpo/q_t": 0.4243543744087219, + "grad_norm": 35.74467849731445, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.4533649682998657, + "logits/rejected": -0.4306221902370453, + "logps/chosen": -434.89617919921875, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -652.2437744140625, + "loss": 1.169, + "margin_dpo/margin_mean": 195.93124389648438, + "margin_dpo/margin_std": 360.15875244140625, + "step": 381 + }, + { + "KL/chosen_KL_mean": -365.1787109375, + "KL/mean": -495.492431640625, + "KL/rejected_KL_mean": -625.8062133789062, + "KL/std": 256.8338623046875, + "epoch": 0.5609397944199707, + "fcm_dpo/beta": 0.0017002095701172948, + "fcm_dpo/delta": -0.0450989231467247, + "fcm_dpo/margin": 260.62750244140625, + "fcm_dpo/q_t": 0.3970971703529358, + "grad_norm": 24.592235565185547, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.49676984548568726, + "logits/rejected": -0.5024675130844116, + "logps/chosen": -431.1990051269531, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71016693115234, + "logps/rejected": -736.516357421875, + "loss": 1.0485, + "margin_dpo/margin_mean": 260.62750244140625, + "margin_dpo/margin_std": 289.3924255371094, + "step": 382 + }, + { + "KL/chosen_KL_mean": -379.60955810546875, + "KL/mean": -507.6312255859375, + "KL/rejected_KL_mean": -635.6529541015625, + "KL/std": 288.3782958984375, + "epoch": 0.5624082232011748, + "fcm_dpo/beta": 0.0016838510055094957, + "fcm_dpo/delta": -0.03266420215368271, + "fcm_dpo/margin": 256.0433349609375, + "fcm_dpo/q_t": 0.40153148770332336, + "grad_norm": 29.20414924621582, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.46060335636138916, + "logits/rejected": -0.4702298641204834, + "logps/chosen": -430.00103759765625, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -729.3687744140625, + "loss": 1.0922, + "margin_dpo/margin_mean": 256.0433654785156, + "margin_dpo/margin_std": 373.36480712890625, + "step": 383 + }, + { + "KL/chosen_KL_mean": -376.22772216796875, + "KL/mean": -477.7603759765625, + "KL/rejected_KL_mean": -579.2930908203125, + "KL/std": 248.6445770263672, + "epoch": 0.5638766519823789, + "fcm_dpo/beta": 0.001703817630186677, + "fcm_dpo/delta": 0.05552485212683678, + "fcm_dpo/margin": 203.06533813476562, + "fcm_dpo/q_t": 0.41818147897720337, + "grad_norm": 22.670204162597656, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.5022902488708496, + "logits/rejected": -0.49495917558670044, + "logps/chosen": -428.2738037109375, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -665.053955078125, + "loss": 1.1105, + "margin_dpo/margin_mean": 203.0653533935547, + "margin_dpo/margin_std": 239.64828491210938, + "step": 384 + }, + { + "KL/chosen_KL_mean": -350.5365295410156, + "KL/mean": -468.0144958496094, + "KL/rejected_KL_mean": -585.492431640625, + "KL/std": 216.46690368652344, + "epoch": 0.5653450807635829, + "fcm_dpo/beta": 0.0017044099513441324, + "fcm_dpo/delta": -0.0005003036931157112, + "fcm_dpo/margin": 234.95591735839844, + "fcm_dpo/q_t": 0.4049755930900574, + "grad_norm": 28.35698127746582, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.5131600499153137, + "logits/rejected": -0.4851377606391907, + "logps/chosen": -416.08868408203125, + "logps/ref_chosen": -65.55215454101562, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -663.3204345703125, + "loss": 1.0662, + "margin_dpo/margin_mean": 234.95591735839844, + "margin_dpo/margin_std": 241.16229248046875, + "step": 385 + }, + { + "KL/chosen_KL_mean": -364.15606689453125, + "KL/mean": -490.9974365234375, + "KL/rejected_KL_mean": -617.8388061523438, + "KL/std": 270.4039611816406, + "epoch": 0.566813509544787, + "fcm_dpo/beta": 0.0016963122179731727, + "fcm_dpo/delta": -0.03167739138007164, + "fcm_dpo/margin": 253.68276977539062, + "fcm_dpo/q_t": 0.40111613273620605, + "grad_norm": 32.36057662963867, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.48662200570106506, + "logits/rejected": -0.48291075229644775, + "logps/chosen": -422.3779296875, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -710.166259765625, + "loss": 1.0717, + "margin_dpo/margin_mean": 253.68276977539062, + "margin_dpo/margin_std": 324.97998046875, + "step": 386 + }, + { + "KL/chosen_KL_mean": -387.12628173828125, + "KL/mean": -492.91485595703125, + "KL/rejected_KL_mean": -598.7034301757812, + "KL/std": 246.6182403564453, + "epoch": 0.5682819383259912, + "fcm_dpo/beta": 0.0016972242156043649, + "fcm_dpo/delta": 0.042357347905635834, + "fcm_dpo/margin": 211.57708740234375, + "fcm_dpo/q_t": 0.4156672954559326, + "grad_norm": 27.685876846313477, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.542807936668396, + "logits/rejected": -0.5214394330978394, + "logps/chosen": -453.5457458496094, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -690.87255859375, + "loss": 1.1055, + "margin_dpo/margin_mean": 211.57708740234375, + "margin_dpo/margin_std": 257.301513671875, + "step": 387 + }, + { + "KL/chosen_KL_mean": -357.0862731933594, + "KL/mean": -501.005126953125, + "KL/rejected_KL_mean": -644.924072265625, + "KL/std": 291.4306335449219, + "epoch": 0.5697503671071953, + "fcm_dpo/beta": 0.001679969485849142, + "fcm_dpo/delta": -0.08814238011837006, + "fcm_dpo/margin": 287.8377380371094, + "fcm_dpo/q_t": 0.38981735706329346, + "grad_norm": 29.01787757873535, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.4653438925743103, + "logits/rejected": -0.4758313298225403, + "logps/chosen": -407.2157287597656, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -749.3570556640625, + "loss": 1.0296, + "margin_dpo/margin_mean": 287.8377380371094, + "margin_dpo/margin_std": 330.7196044921875, + "step": 388 + }, + { + "KL/chosen_KL_mean": -387.08868408203125, + "KL/mean": -507.94390869140625, + "KL/rejected_KL_mean": -628.7991333007812, + "KL/std": 287.0469970703125, + "epoch": 0.5712187958883994, + "fcm_dpo/beta": 0.001672594342380762, + "fcm_dpo/delta": -0.004527151584625244, + "fcm_dpo/margin": 241.71041870117188, + "fcm_dpo/q_t": 0.4067634344100952, + "grad_norm": 32.89521789550781, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.5016822218894958, + "logits/rejected": -0.49213531613349915, + "logps/chosen": -444.99530029296875, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -706.7136840820312, + "loss": 1.0873, + "margin_dpo/margin_mean": 241.71041870117188, + "margin_dpo/margin_std": 314.6967468261719, + "step": 389 + }, + { + "KL/chosen_KL_mean": -378.8408508300781, + "KL/mean": -499.02130126953125, + "KL/rejected_KL_mean": -619.20166015625, + "KL/std": 283.59832763671875, + "epoch": 0.5726872246696035, + "fcm_dpo/beta": 0.0016685773152858019, + "fcm_dpo/delta": -0.001378379762172699, + "fcm_dpo/margin": 240.36087036132812, + "fcm_dpo/q_t": 0.41037964820861816, + "grad_norm": 22.919740676879883, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.4558466672897339, + "logits/rejected": -0.45187222957611084, + "logps/chosen": -428.0667724609375, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -704.7298583984375, + "loss": 1.1064, + "margin_dpo/margin_mean": 240.36087036132812, + "margin_dpo/margin_std": 358.0471496582031, + "step": 390 + }, + { + "KL/chosen_KL_mean": -380.08941650390625, + "KL/mean": -455.04193115234375, + "KL/rejected_KL_mean": -529.9944458007812, + "KL/std": 265.760986328125, + "epoch": 0.5741556534508077, + "fcm_dpo/beta": 0.0017130144406110048, + "fcm_dpo/delta": 0.1468581259250641, + "fcm_dpo/margin": 149.905029296875, + "fcm_dpo/q_t": 0.4409900903701782, + "grad_norm": 36.1956672668457, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.4843197762966156, + "logits/rejected": -0.47525227069854736, + "logps/chosen": -444.4190673828125, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -616.732666015625, + "loss": 1.203, + "margin_dpo/margin_mean": 149.90504455566406, + "margin_dpo/margin_std": 278.86932373046875, + "step": 391 + }, + { + "KL/chosen_KL_mean": -321.9182434082031, + "KL/mean": -463.30865478515625, + "KL/rejected_KL_mean": -604.6990966796875, + "KL/std": 282.4703674316406, + "epoch": 0.5756240822320118, + "fcm_dpo/beta": 0.001707045128569007, + "fcm_dpo/delta": -0.08683174103498459, + "fcm_dpo/margin": 282.7807922363281, + "fcm_dpo/q_t": 0.3881346583366394, + "grad_norm": 23.744003295898438, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.46998441219329834, + "logits/rejected": -0.4667205512523651, + "logps/chosen": -375.4222412109375, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34584045410156, + "logps/rejected": -707.044921875, + "loss": 1.0159, + "margin_dpo/margin_mean": 282.78076171875, + "margin_dpo/margin_std": 284.8000183105469, + "step": 392 + }, + { + "KL/chosen_KL_mean": -327.07318115234375, + "KL/mean": -438.6499938964844, + "KL/rejected_KL_mean": -550.226806640625, + "KL/std": 282.72235107421875, + "epoch": 0.5770925110132159, + "fcm_dpo/beta": 0.0016989409923553467, + "fcm_dpo/delta": 0.021702561527490616, + "fcm_dpo/margin": 223.1536102294922, + "fcm_dpo/q_t": 0.4159389138221741, + "grad_norm": 22.198171615600586, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.4430672526359558, + "logits/rejected": -0.43764716386795044, + "logps/chosen": -373.547119140625, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -622.1956787109375, + "loss": 1.1229, + "margin_dpo/margin_mean": 223.1536102294922, + "margin_dpo/margin_std": 354.7700500488281, + "step": 393 + }, + { + "KL/chosen_KL_mean": -356.56494140625, + "KL/mean": -474.52728271484375, + "KL/rejected_KL_mean": -592.4896240234375, + "KL/std": 290.612548828125, + "epoch": 0.57856093979442, + "fcm_dpo/beta": 0.0017010483425110579, + "fcm_dpo/delta": -0.001378481974825263, + "fcm_dpo/margin": 235.9246368408203, + "fcm_dpo/q_t": 0.40806007385253906, + "grad_norm": 21.11285972595215, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.45615267753601074, + "logits/rejected": -0.45724251866340637, + "logps/chosen": -409.47650146484375, + "logps/ref_chosen": -52.91154861450195, + "logps/ref_rejected": -90.8226318359375, + "logps/rejected": -683.312255859375, + "loss": 1.0882, + "margin_dpo/margin_mean": 235.92465209960938, + "margin_dpo/margin_std": 308.0911865234375, + "step": 394 + }, + { + "KL/chosen_KL_mean": -360.2952880859375, + "KL/mean": -485.2971496582031, + "KL/rejected_KL_mean": -610.299072265625, + "KL/std": 286.58233642578125, + "epoch": 0.580029368575624, + "fcm_dpo/beta": 0.0016905716620385647, + "fcm_dpo/delta": -0.023899473249912262, + "fcm_dpo/margin": 250.00375366210938, + "fcm_dpo/q_t": 0.4038696587085724, + "grad_norm": 23.830419540405273, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.5101211667060852, + "logits/rejected": -0.5033398270606995, + "logps/chosen": -422.8414001464844, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -694.0816650390625, + "loss": 1.0818, + "margin_dpo/margin_mean": 250.00375366210938, + "margin_dpo/margin_std": 334.2568359375, + "step": 395 + }, + { + "KL/chosen_KL_mean": -359.490966796875, + "KL/mean": -474.9879150390625, + "KL/rejected_KL_mean": -590.48486328125, + "KL/std": 282.67681884765625, + "epoch": 0.5814977973568282, + "fcm_dpo/beta": 0.0016928238328546286, + "fcm_dpo/delta": 0.009283696301281452, + "fcm_dpo/margin": 230.993896484375, + "fcm_dpo/q_t": 0.40855342149734497, + "grad_norm": 24.08172607421875, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.4725229740142822, + "logits/rejected": -0.4491950571537018, + "logps/chosen": -428.4869079589844, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -679.1314697265625, + "loss": 1.0811, + "margin_dpo/margin_mean": 230.993896484375, + "margin_dpo/margin_std": 268.80023193359375, + "step": 396 + }, + { + "KL/chosen_KL_mean": -343.18365478515625, + "KL/mean": -490.54986572265625, + "KL/rejected_KL_mean": -637.9160766601562, + "KL/std": 279.0823974609375, + "epoch": 0.5829662261380323, + "fcm_dpo/beta": 0.0016665621660649776, + "fcm_dpo/delta": -0.09650179743766785, + "fcm_dpo/margin": 294.732421875, + "fcm_dpo/q_t": 0.38623836636543274, + "grad_norm": 34.25861358642578, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.46848201751708984, + "logits/rejected": -0.47495004534721375, + "logps/chosen": -404.4608154296875, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -741.0322265625, + "loss": 1.0128, + "margin_dpo/margin_mean": 294.732421875, + "margin_dpo/margin_std": 302.2978515625, + "step": 397 + }, + { + "KL/chosen_KL_mean": -366.3542175292969, + "KL/mean": -501.455322265625, + "KL/rejected_KL_mean": -636.5565185546875, + "KL/std": 288.9761962890625, + "epoch": 0.5844346549192364, + "fcm_dpo/beta": 0.0016521359793841839, + "fcm_dpo/delta": -0.04858284816145897, + "fcm_dpo/margin": 270.20220947265625, + "fcm_dpo/q_t": 0.3986930847167969, + "grad_norm": 28.262405395507812, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.5342578887939453, + "logits/rejected": -0.53399258852005, + "logps/chosen": -434.5057678222656, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -745.080078125, + "loss": 1.0596, + "margin_dpo/margin_mean": 270.20220947265625, + "margin_dpo/margin_std": 338.95098876953125, + "step": 398 + }, + { + "KL/chosen_KL_mean": -335.1020202636719, + "KL/mean": -451.53240966796875, + "KL/rejected_KL_mean": -567.9627685546875, + "KL/std": 267.4539489746094, + "epoch": 0.5859030837004405, + "fcm_dpo/beta": 0.0016457277815788984, + "fcm_dpo/delta": 0.01717957854270935, + "fcm_dpo/margin": 232.86073303222656, + "fcm_dpo/q_t": 0.41211044788360596, + "grad_norm": 28.764699935913086, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.5000342130661011, + "logits/rejected": -0.4618859887123108, + "logps/chosen": -395.9918212890625, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.965576171875, + "logps/rejected": -645.9283447265625, + "loss": 1.1164, + "margin_dpo/margin_mean": 232.8607177734375, + "margin_dpo/margin_std": 344.62420654296875, + "step": 399 + }, + { + "KL/chosen_KL_mean": -311.3885192871094, + "KL/mean": -480.91925048828125, + "KL/rejected_KL_mean": -650.449951171875, + "KL/std": 279.1466064453125, + "epoch": 0.5873715124816447, + "fcm_dpo/beta": 0.0016120923683047295, + "fcm_dpo/delta": -0.15557917952537537, + "fcm_dpo/margin": 339.0614013671875, + "fcm_dpo/q_t": 0.37171751260757446, + "grad_norm": 27.097814559936523, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.4866938591003418, + "logits/rejected": -0.4817845821380615, + "logps/chosen": -375.0321044921875, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -755.70263671875, + "loss": 0.968, + "margin_dpo/margin_mean": 339.0614013671875, + "margin_dpo/margin_std": 303.73828125, + "step": 400 + }, + { + "KL/chosen_KL_mean": -366.85516357421875, + "KL/mean": -460.4599304199219, + "KL/rejected_KL_mean": -554.064697265625, + "KL/std": 292.738037109375, + "epoch": 0.5888399412628488, + "fcm_dpo/beta": 0.0016232456546276808, + "fcm_dpo/delta": 0.09924636781215668, + "fcm_dpo/margin": 187.20950317382812, + "fcm_dpo/q_t": 0.4327518343925476, + "grad_norm": 30.14374351501465, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.4865230619907379, + "logits/rejected": -0.4814421534538269, + "logps/chosen": -424.0182189941406, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -637.857177734375, + "loss": 1.2006, + "margin_dpo/margin_mean": 187.20950317382812, + "margin_dpo/margin_std": 391.4587707519531, + "step": 401 + }, + { + "KL/chosen_KL_mean": -272.751953125, + "KL/mean": -456.13507080078125, + "KL/rejected_KL_mean": -639.5181884765625, + "KL/std": 318.4268798828125, + "epoch": 0.5903083700440529, + "fcm_dpo/beta": 0.0015893438830971718, + "fcm_dpo/delta": -0.19418612122535706, + "fcm_dpo/margin": 366.76617431640625, + "fcm_dpo/q_t": 0.3651409447193146, + "grad_norm": 27.241565704345703, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.5259881019592285, + "logits/rejected": -0.5160728693008423, + "logps/chosen": -323.4923400878906, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -720.564208984375, + "loss": 0.9443, + "margin_dpo/margin_mean": 366.76617431640625, + "margin_dpo/margin_std": 324.19622802734375, + "step": 402 + }, + { + "KL/chosen_KL_mean": -300.929443359375, + "KL/mean": -437.60577392578125, + "KL/rejected_KL_mean": -574.2821044921875, + "KL/std": 293.6529541015625, + "epoch": 0.591776798825257, + "fcm_dpo/beta": 0.0015664222883060575, + "fcm_dpo/delta": -0.029441511258482933, + "fcm_dpo/margin": 273.35260009765625, + "fcm_dpo/q_t": 0.40054211020469666, + "grad_norm": 27.655994415283203, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.5488017797470093, + "logits/rejected": -0.542914867401123, + "logps/chosen": -348.0767822265625, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -651.5487060546875, + "loss": 1.057, + "margin_dpo/margin_mean": 273.35260009765625, + "margin_dpo/margin_std": 303.58697509765625, + "step": 403 + }, + { + "KL/chosen_KL_mean": -353.74798583984375, + "KL/mean": -488.7012939453125, + "KL/rejected_KL_mean": -623.654541015625, + "KL/std": 293.87213134765625, + "epoch": 0.593245227606461, + "fcm_dpo/beta": 0.001556064235046506, + "fcm_dpo/delta": -0.020973514765501022, + "fcm_dpo/margin": 269.9066162109375, + "fcm_dpo/q_t": 0.40406206250190735, + "grad_norm": 33.138004302978516, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.5281866788864136, + "logits/rejected": -0.5200468301773071, + "logps/chosen": -401.62322998046875, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -700.8095703125, + "loss": 1.091, + "margin_dpo/margin_mean": 269.9066162109375, + "margin_dpo/margin_std": 377.742919921875, + "step": 404 + }, + { + "KL/chosen_KL_mean": -396.12872314453125, + "KL/mean": -505.8334045410156, + "KL/rejected_KL_mean": -615.5380859375, + "KL/std": 310.9896240234375, + "epoch": 0.5947136563876652, + "fcm_dpo/beta": 0.0015513843391090631, + "fcm_dpo/delta": -0.03758659213781357, + "fcm_dpo/margin": 219.4093475341797, + "fcm_dpo/q_t": 0.4234468638896942, + "grad_norm": 35.584747314453125, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.5194091796875, + "logits/rejected": -0.5121314525604248, + "logps/chosen": -461.2916259765625, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -702.724853515625, + "loss": 1.1657, + "margin_dpo/margin_mean": 219.4093475341797, + "margin_dpo/margin_std": 391.95977783203125, + "step": 405 + }, + { + "KL/chosen_KL_mean": -364.4059753417969, + "KL/mean": -506.44384765625, + "KL/rejected_KL_mean": -648.481689453125, + "KL/std": 309.79296875, + "epoch": 0.5961820851688693, + "fcm_dpo/beta": 0.0015432301443070173, + "fcm_dpo/delta": -0.0403851754963398, + "fcm_dpo/margin": 284.0757141113281, + "fcm_dpo/q_t": 0.39986056089401245, + "grad_norm": 25.82929801940918, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.54176926612854, + "logits/rejected": -0.5563890933990479, + "logps/chosen": -414.14678955078125, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -740.560302734375, + "loss": 1.0647, + "margin_dpo/margin_mean": 284.07568359375, + "margin_dpo/margin_std": 352.7003173828125, + "step": 406 + }, + { + "KL/chosen_KL_mean": -376.4976806640625, + "KL/mean": -473.91802978515625, + "KL/rejected_KL_mean": -571.33837890625, + "KL/std": 257.8326110839844, + "epoch": 0.5976505139500734, + "fcm_dpo/beta": 0.0015315297059714794, + "fcm_dpo/delta": -0.01038383599370718, + "fcm_dpo/margin": 194.84068298339844, + "fcm_dpo/q_t": 0.42981648445129395, + "grad_norm": 35.46054458618164, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.5773499011993408, + "logits/rejected": -0.5510052442550659, + "logps/chosen": -432.8283996582031, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.51209259033203, + "logps/rejected": -648.8504638671875, + "loss": 1.1752, + "margin_dpo/margin_mean": 194.8406982421875, + "margin_dpo/margin_std": 331.57635498046875, + "step": 407 + }, + { + "KL/chosen_KL_mean": -382.6614990234375, + "KL/mean": -483.43701171875, + "KL/rejected_KL_mean": -584.2125244140625, + "KL/std": 246.0572967529297, + "epoch": 0.5991189427312775, + "fcm_dpo/beta": 0.0015524220652878284, + "fcm_dpo/delta": 0.08978626132011414, + "fcm_dpo/margin": 201.551025390625, + "fcm_dpo/q_t": 0.4264683127403259, + "grad_norm": 37.8366813659668, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.57146817445755, + "logits/rejected": -0.5515158176422119, + "logps/chosen": -452.4508056640625, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -674.3094482421875, + "loss": 1.1414, + "margin_dpo/margin_mean": 201.551025390625, + "margin_dpo/margin_std": 274.039306640625, + "step": 408 + }, + { + "KL/chosen_KL_mean": -374.21478271484375, + "KL/mean": -472.4864501953125, + "KL/rejected_KL_mean": -570.7581787109375, + "KL/std": 268.3485107421875, + "epoch": 0.6005873715124816, + "fcm_dpo/beta": 0.0015834926161915064, + "fcm_dpo/delta": 0.09120422601699829, + "fcm_dpo/margin": 196.54339599609375, + "fcm_dpo/q_t": 0.4273938536643982, + "grad_norm": 39.993656158447266, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.5402634143829346, + "logits/rejected": -0.5223067998886108, + "logps/chosen": -441.5321960449219, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -655.6624755859375, + "loss": 1.158, + "margin_dpo/margin_mean": 196.5434112548828, + "margin_dpo/margin_std": 309.50628662109375, + "step": 409 + }, + { + "KL/chosen_KL_mean": -335.39337158203125, + "KL/mean": -453.332763671875, + "KL/rejected_KL_mean": -571.2720947265625, + "KL/std": 258.4756774902344, + "epoch": 0.6020558002936858, + "fcm_dpo/beta": 0.0015911536756902933, + "fcm_dpo/delta": 0.025650672614574432, + "fcm_dpo/margin": 235.87872314453125, + "fcm_dpo/q_t": 0.41136714816093445, + "grad_norm": 29.003385543823242, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.5437331795692444, + "logits/rejected": -0.5466384887695312, + "logps/chosen": -386.8587341308594, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -654.4710693359375, + "loss": 1.0969, + "margin_dpo/margin_mean": 235.8787384033203, + "margin_dpo/margin_std": 290.5599365234375, + "step": 410 + }, + { + "KL/chosen_KL_mean": -340.03350830078125, + "KL/mean": -455.3880615234375, + "KL/rejected_KL_mean": -570.7425537109375, + "KL/std": 282.7340393066406, + "epoch": 0.6035242290748899, + "fcm_dpo/beta": 0.001590752974152565, + "fcm_dpo/delta": 0.033395539969205856, + "fcm_dpo/margin": 230.70909118652344, + "fcm_dpo/q_t": 0.41678670048713684, + "grad_norm": 26.219621658325195, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.520312488079071, + "logits/rejected": -0.5262941718101501, + "logps/chosen": -392.34075927734375, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -651.4375, + "loss": 1.1209, + "margin_dpo/margin_mean": 230.70907592773438, + "margin_dpo/margin_std": 329.6683349609375, + "step": 411 + }, + { + "KL/chosen_KL_mean": -355.45513916015625, + "KL/mean": -482.0203857421875, + "KL/rejected_KL_mean": -608.5856323242188, + "KL/std": 281.10675048828125, + "epoch": 0.604992657856094, + "fcm_dpo/beta": 0.0016053288709372282, + "fcm_dpo/delta": -0.006643663160502911, + "fcm_dpo/margin": 253.1304931640625, + "fcm_dpo/q_t": 0.4056549072265625, + "grad_norm": 40.66035079956055, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.5641697645187378, + "logits/rejected": -0.5924566984176636, + "logps/chosen": -408.5992736816406, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.0608139038086, + "logps/rejected": -708.6464233398438, + "loss": 1.0857, + "margin_dpo/margin_mean": 253.13047790527344, + "margin_dpo/margin_std": 327.401611328125, + "step": 412 + }, + { + "KL/chosen_KL_mean": -375.2008056640625, + "KL/mean": -497.345703125, + "KL/rejected_KL_mean": -619.4906005859375, + "KL/std": 274.7577819824219, + "epoch": 0.6064610866372981, + "fcm_dpo/beta": 0.001608746824786067, + "fcm_dpo/delta": 0.007142549380660057, + "fcm_dpo/margin": 244.28973388671875, + "fcm_dpo/q_t": 0.40697982907295227, + "grad_norm": 26.795982360839844, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.4796082675457001, + "logits/rejected": -0.4848223328590393, + "logps/chosen": -436.78277587890625, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -718.9639892578125, + "loss": 1.0903, + "margin_dpo/margin_mean": 244.28973388671875, + "margin_dpo/margin_std": 309.2333984375, + "step": 413 + }, + { + "KL/chosen_KL_mean": -338.37677001953125, + "KL/mean": -473.0335998535156, + "KL/rejected_KL_mean": -607.6904296875, + "KL/std": 263.765625, + "epoch": 0.6079295154185022, + "fcm_dpo/beta": 0.0016081007197499275, + "fcm_dpo/delta": -0.035320840775966644, + "fcm_dpo/margin": 269.31365966796875, + "fcm_dpo/q_t": 0.3990900218486786, + "grad_norm": 38.35430908203125, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.4838346838951111, + "logits/rejected": -0.47299548983573914, + "logps/chosen": -385.00823974609375, + "logps/ref_chosen": -46.63148498535156, + "logps/ref_rejected": -87.64653015136719, + "logps/rejected": -695.3369750976562, + "loss": 1.0622, + "margin_dpo/margin_mean": 269.3136901855469, + "margin_dpo/margin_std": 310.88043212890625, + "step": 414 + }, + { + "KL/chosen_KL_mean": -378.6455993652344, + "KL/mean": -483.13079833984375, + "KL/rejected_KL_mean": -587.6160278320312, + "KL/std": 267.6329345703125, + "epoch": 0.6093979441997063, + "fcm_dpo/beta": 0.0016053159488365054, + "fcm_dpo/delta": 0.06679742783308029, + "fcm_dpo/margin": 208.97039794921875, + "fcm_dpo/q_t": 0.4219440221786499, + "grad_norm": 31.549434661865234, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.5333505868911743, + "logits/rejected": -0.5307386517524719, + "logps/chosen": -457.2638854980469, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -688.093505859375, + "loss": 1.1386, + "margin_dpo/margin_mean": 208.97039794921875, + "margin_dpo/margin_std": 311.5289306640625, + "step": 415 + }, + { + "KL/chosen_KL_mean": -346.6520690917969, + "KL/mean": -505.2399597167969, + "KL/rejected_KL_mean": -663.827880859375, + "KL/std": 294.3641357421875, + "epoch": 0.6108663729809104, + "fcm_dpo/beta": 0.0015930493827909231, + "fcm_dpo/delta": -0.11073094606399536, + "fcm_dpo/margin": 317.17578125, + "fcm_dpo/q_t": 0.38171255588531494, + "grad_norm": 32.9141731262207, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.5502901077270508, + "logits/rejected": -0.5549454092979431, + "logps/chosen": -404.93121337890625, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -754.3966064453125, + "loss": 0.9955, + "margin_dpo/margin_mean": 317.17578125, + "margin_dpo/margin_std": 294.7266845703125, + "step": 416 + }, + { + "KL/chosen_KL_mean": -341.5537109375, + "KL/mean": -474.1524658203125, + "KL/rejected_KL_mean": -606.7512817382812, + "KL/std": 268.6434631347656, + "epoch": 0.6123348017621145, + "fcm_dpo/beta": 0.0015767996665090322, + "fcm_dpo/delta": -0.01895320415496826, + "fcm_dpo/margin": 265.19757080078125, + "fcm_dpo/q_t": 0.40123608708381653, + "grad_norm": 31.803016662597656, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.4980270266532898, + "logits/rejected": -0.47427403926849365, + "logps/chosen": -391.7524108886719, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -674.903076171875, + "loss": 1.0551, + "margin_dpo/margin_mean": 265.19757080078125, + "margin_dpo/margin_std": 272.559814453125, + "step": 417 + }, + { + "KL/chosen_KL_mean": -383.3123779296875, + "KL/mean": -517.654541015625, + "KL/rejected_KL_mean": -651.9967041015625, + "KL/std": 306.8751220703125, + "epoch": 0.6138032305433186, + "fcm_dpo/beta": 0.0015800942201167345, + "fcm_dpo/delta": -0.026646777987480164, + "fcm_dpo/margin": 268.684326171875, + "fcm_dpo/q_t": 0.40411561727523804, + "grad_norm": 25.314533233642578, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.5755934715270996, + "logits/rejected": -0.5533599853515625, + "logps/chosen": -465.2908630371094, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -756.688232421875, + "loss": 1.0853, + "margin_dpo/margin_mean": 268.684326171875, + "margin_dpo/margin_std": 358.3873291015625, + "step": 418 + }, + { + "KL/chosen_KL_mean": -348.782470703125, + "KL/mean": -498.6832275390625, + "KL/rejected_KL_mean": -648.583984375, + "KL/std": 267.0831298828125, + "epoch": 0.6152716593245228, + "fcm_dpo/beta": 0.0015506461495533586, + "fcm_dpo/delta": -0.06802451610565186, + "fcm_dpo/margin": 299.801513671875, + "fcm_dpo/q_t": 0.3909485936164856, + "grad_norm": 24.278188705444336, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.5322695970535278, + "logits/rejected": -0.5325411558151245, + "logps/chosen": -401.7311096191406, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -740.1671142578125, + "loss": 1.0295, + "margin_dpo/margin_mean": 299.801513671875, + "margin_dpo/margin_std": 311.72760009765625, + "step": 419 + }, + { + "KL/chosen_KL_mean": -444.0596923828125, + "KL/mean": -532.8899536132812, + "KL/rejected_KL_mean": -621.72021484375, + "KL/std": 289.5302734375, + "epoch": 0.6167400881057269, + "fcm_dpo/beta": 0.001575858099386096, + "fcm_dpo/delta": 0.12323421239852905, + "fcm_dpo/margin": 177.66058349609375, + "fcm_dpo/q_t": 0.4371680021286011, + "grad_norm": 60.59740447998047, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.5358173847198486, + "logits/rejected": -0.4956286549568176, + "logps/chosen": -521.82958984375, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -691.0401000976562, + "loss": 1.2169, + "margin_dpo/margin_mean": 177.66058349609375, + "margin_dpo/margin_std": 394.1659240722656, + "step": 420 + }, + { + "KL/chosen_KL_mean": -365.69305419921875, + "KL/mean": -470.6966552734375, + "KL/rejected_KL_mean": -575.7002563476562, + "KL/std": 274.4378967285156, + "epoch": 0.618208516886931, + "fcm_dpo/beta": 0.001605308847501874, + "fcm_dpo/delta": 0.06441329419612885, + "fcm_dpo/margin": 210.0072021484375, + "fcm_dpo/q_t": 0.4202464818954468, + "grad_norm": 29.370344161987305, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.5225532054901123, + "logits/rejected": -0.5048704147338867, + "logps/chosen": -419.45892333984375, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -664.981689453125, + "loss": 1.1354, + "margin_dpo/margin_mean": 210.0072021484375, + "margin_dpo/margin_std": 298.9598388671875, + "step": 421 + }, + { + "KL/chosen_KL_mean": -418.38153076171875, + "KL/mean": -540.8232421875, + "KL/rejected_KL_mean": -663.264892578125, + "KL/std": 289.33966064453125, + "epoch": 0.6196769456681351, + "fcm_dpo/beta": 0.0016051906859502196, + "fcm_dpo/delta": 0.007199084386229515, + "fcm_dpo/margin": 244.88333129882812, + "fcm_dpo/q_t": 0.40865635871887207, + "grad_norm": 34.15210723876953, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.555054783821106, + "logits/rejected": -0.5556162595748901, + "logps/chosen": -487.01531982421875, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -751.12841796875, + "loss": 1.102, + "margin_dpo/margin_mean": 244.8833465576172, + "margin_dpo/margin_std": 341.8487548828125, + "step": 422 + }, + { + "KL/chosen_KL_mean": -407.53509521484375, + "KL/mean": -542.244140625, + "KL/rejected_KL_mean": -676.9532470703125, + "KL/std": 281.966796875, + "epoch": 0.6211453744493393, + "fcm_dpo/beta": 0.0015967879444360733, + "fcm_dpo/delta": -0.0316154807806015, + "fcm_dpo/margin": 269.41815185546875, + "fcm_dpo/q_t": 0.39945095777511597, + "grad_norm": 28.743013381958008, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.5464938879013062, + "logits/rejected": -0.5520018339157104, + "logps/chosen": -462.525390625, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -763.2598266601562, + "loss": 1.0566, + "margin_dpo/margin_mean": 269.41815185546875, + "margin_dpo/margin_std": 303.0052795410156, + "step": 423 + }, + { + "KL/chosen_KL_mean": -378.3030090332031, + "KL/mean": -499.42327880859375, + "KL/rejected_KL_mean": -620.5435791015625, + "KL/std": 280.09075927734375, + "epoch": 0.6226138032305433, + "fcm_dpo/beta": 0.0015932518290355802, + "fcm_dpo/delta": 0.014376441016793251, + "fcm_dpo/margin": 242.24057006835938, + "fcm_dpo/q_t": 0.4120855927467346, + "grad_norm": 30.807126998901367, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.5540552139282227, + "logits/rejected": -0.5233687162399292, + "logps/chosen": -434.31494140625, + "logps/ref_chosen": -56.01192092895508, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -687.0225830078125, + "loss": 1.1061, + "margin_dpo/margin_mean": 242.24058532714844, + "margin_dpo/margin_std": 339.5845947265625, + "step": 424 + }, + { + "KL/chosen_KL_mean": -409.63446044921875, + "KL/mean": -513.9957885742188, + "KL/rejected_KL_mean": -618.3570556640625, + "KL/std": 262.7354736328125, + "epoch": 0.6240822320117474, + "fcm_dpo/beta": 0.00161844864487648, + "fcm_dpo/delta": 0.06421151012182236, + "fcm_dpo/margin": 208.72262573242188, + "fcm_dpo/q_t": 0.42051440477371216, + "grad_norm": 38.50931930541992, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.5310732126235962, + "logits/rejected": -0.5318828225135803, + "logps/chosen": -456.50347900390625, + "logps/ref_chosen": -46.86899948120117, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -714.2825317382812, + "loss": 1.1387, + "margin_dpo/margin_mean": 208.72262573242188, + "margin_dpo/margin_std": 312.8076171875, + "step": 425 + }, + { + "KL/chosen_KL_mean": -386.08856201171875, + "KL/mean": -505.2015686035156, + "KL/rejected_KL_mean": -624.3145751953125, + "KL/std": 267.33624267578125, + "epoch": 0.6255506607929515, + "fcm_dpo/beta": 0.0016317331464961171, + "fcm_dpo/delta": 0.011202432215213776, + "fcm_dpo/margin": 238.22601318359375, + "fcm_dpo/q_t": 0.409078449010849, + "grad_norm": 31.958324432373047, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.5409977436065674, + "logits/rejected": -0.501447319984436, + "logps/chosen": -462.672119140625, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -705.5811767578125, + "loss": 1.0974, + "margin_dpo/margin_mean": 238.22601318359375, + "margin_dpo/margin_std": 305.9199523925781, + "step": 426 + }, + { + "KL/chosen_KL_mean": -357.8903503417969, + "KL/mean": -448.9057312011719, + "KL/rejected_KL_mean": -539.921142578125, + "KL/std": 236.3748779296875, + "epoch": 0.6270190895741556, + "fcm_dpo/beta": 0.0016468719113618135, + "fcm_dpo/delta": 0.10344026982784271, + "fcm_dpo/margin": 182.03077697753906, + "fcm_dpo/q_t": 0.4312647581100464, + "grad_norm": 26.211519241333008, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.5992106199264526, + "logits/rejected": -0.5759471654891968, + "logps/chosen": -422.7442321777344, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.5660171508789, + "logps/rejected": -618.4871215820312, + "loss": 1.1615, + "margin_dpo/margin_mean": 182.03079223632812, + "margin_dpo/margin_std": 283.958251953125, + "step": 427 + }, + { + "KL/chosen_KL_mean": -429.00396728515625, + "KL/mean": -561.623291015625, + "KL/rejected_KL_mean": -694.2426147460938, + "KL/std": 311.591064453125, + "epoch": 0.6284875183553598, + "fcm_dpo/beta": 0.001645084354095161, + "fcm_dpo/delta": -0.038231100887060165, + "fcm_dpo/margin": 265.2386779785156, + "fcm_dpo/q_t": 0.4009571671485901, + "grad_norm": 35.00596237182617, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.5608881711959839, + "logits/rejected": -0.559416651725769, + "logps/chosen": -491.640625, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28181457519531, + "logps/rejected": -797.5244140625, + "loss": 1.0869, + "margin_dpo/margin_mean": 265.2386474609375, + "margin_dpo/margin_std": 379.43597412109375, + "step": 428 + }, + { + "KL/chosen_KL_mean": -423.6542053222656, + "KL/mean": -521.1947631835938, + "KL/rejected_KL_mean": -618.7353515625, + "KL/std": 257.50677490234375, + "epoch": 0.6299559471365639, + "fcm_dpo/beta": 0.0016439331229776144, + "fcm_dpo/delta": -0.013148479163646698, + "fcm_dpo/margin": 195.0811767578125, + "fcm_dpo/q_t": 0.42502105236053467, + "grad_norm": 28.7783145904541, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.5880295634269714, + "logits/rejected": -0.5610803961753845, + "logps/chosen": -504.8882141113281, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -710.5302734375, + "loss": 1.1646, + "margin_dpo/margin_mean": 195.08114624023438, + "margin_dpo/margin_std": 329.076171875, + "step": 429 + }, + { + "KL/chosen_KL_mean": -349.86883544921875, + "KL/mean": -457.08282470703125, + "KL/rejected_KL_mean": -564.2967529296875, + "KL/std": 251.49932861328125, + "epoch": 0.631424375917768, + "fcm_dpo/beta": 0.0016547690611332655, + "fcm_dpo/delta": 0.04677361994981766, + "fcm_dpo/margin": 214.4279022216797, + "fcm_dpo/q_t": 0.4177946150302887, + "grad_norm": 30.429576873779297, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5739535093307495, + "logits/rejected": -0.5771204233169556, + "logps/chosen": -410.7891845703125, + "logps/ref_chosen": -60.920326232910156, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -668.7196044921875, + "loss": 1.1134, + "margin_dpo/margin_mean": 214.42791748046875, + "margin_dpo/margin_std": 282.10595703125, + "step": 430 + }, + { + "KL/chosen_KL_mean": -334.3349609375, + "KL/mean": -474.6339111328125, + "KL/rejected_KL_mean": -614.932861328125, + "KL/std": 273.7649230957031, + "epoch": 0.6328928046989721, + "fcm_dpo/beta": 0.0016404774505645037, + "fcm_dpo/delta": -0.06328192353248596, + "fcm_dpo/margin": 280.59796142578125, + "fcm_dpo/q_t": 0.39278194308280945, + "grad_norm": 32.40290451049805, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.5994788408279419, + "logits/rejected": -0.5967893600463867, + "logps/chosen": -391.6836853027344, + "logps/ref_chosen": -57.34874725341797, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -707.7731323242188, + "loss": 1.0381, + "margin_dpo/margin_mean": 280.59796142578125, + "margin_dpo/margin_std": 304.55780029296875, + "step": 431 + }, + { + "KL/chosen_KL_mean": -322.6483154296875, + "KL/mean": -469.07745361328125, + "KL/rejected_KL_mean": -615.506591796875, + "KL/std": 274.3560485839844, + "epoch": 0.6343612334801763, + "fcm_dpo/beta": 0.0016270647756755352, + "fcm_dpo/delta": -0.08062286674976349, + "fcm_dpo/margin": 292.85833740234375, + "fcm_dpo/q_t": 0.38900789618492126, + "grad_norm": 35.32616424560547, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.5596065521240234, + "logits/rejected": -0.5433411598205566, + "logps/chosen": -393.72308349609375, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -714.086181640625, + "loss": 1.0171, + "margin_dpo/margin_mean": 292.85833740234375, + "margin_dpo/margin_std": 278.27447509765625, + "step": 432 + }, + { + "KL/chosen_KL_mean": -416.74896240234375, + "KL/mean": -512.5089721679688, + "KL/rejected_KL_mean": -608.2689208984375, + "KL/std": 277.8785095214844, + "epoch": 0.6358296622613803, + "fcm_dpo/beta": 0.001634822110645473, + "fcm_dpo/delta": 0.08947563171386719, + "fcm_dpo/margin": 191.52005004882812, + "fcm_dpo/q_t": 0.425261914730072, + "grad_norm": 36.68017578125, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.6115612983703613, + "logits/rejected": -0.5973314046859741, + "logps/chosen": -475.02215576171875, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -704.2198486328125, + "loss": 1.1663, + "margin_dpo/margin_mean": 191.52005004882812, + "margin_dpo/margin_std": 325.86212158203125, + "step": 433 + }, + { + "KL/chosen_KL_mean": -380.5859375, + "KL/mean": -486.9596252441406, + "KL/rejected_KL_mean": -593.333251953125, + "KL/std": 263.0496520996094, + "epoch": 0.6372980910425844, + "fcm_dpo/beta": 0.0016470999689772725, + "fcm_dpo/delta": 0.05136201158165932, + "fcm_dpo/margin": 212.747314453125, + "fcm_dpo/q_t": 0.4220370948314667, + "grad_norm": 33.74725341796875, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.6359285116195679, + "logits/rejected": -0.6292107105255127, + "logps/chosen": -442.5596618652344, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -671.8319091796875, + "loss": 1.1326, + "margin_dpo/margin_mean": 212.747314453125, + "margin_dpo/margin_std": 328.4437561035156, + "step": 434 + }, + { + "KL/chosen_KL_mean": -346.1165771484375, + "KL/mean": -471.15478515625, + "KL/rejected_KL_mean": -596.1929931640625, + "KL/std": 277.8624267578125, + "epoch": 0.6387665198237885, + "fcm_dpo/beta": 0.0016516190953552723, + "fcm_dpo/delta": -0.0136133236810565, + "fcm_dpo/margin": 250.07640075683594, + "fcm_dpo/q_t": 0.4048606753349304, + "grad_norm": 35.001678466796875, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.643917441368103, + "logits/rejected": -0.6357216835021973, + "logps/chosen": -397.61865234375, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -683.7598876953125, + "loss": 1.0826, + "margin_dpo/margin_mean": 250.076416015625, + "margin_dpo/margin_std": 318.9538269042969, + "step": 435 + }, + { + "KL/chosen_KL_mean": -359.42626953125, + "KL/mean": -470.8343505859375, + "KL/rejected_KL_mean": -582.242431640625, + "KL/std": 251.36630249023438, + "epoch": 0.6402349486049926, + "fcm_dpo/beta": 0.0016546837287023664, + "fcm_dpo/delta": 0.032419584691524506, + "fcm_dpo/margin": 222.81613159179688, + "fcm_dpo/q_t": 0.41425737738609314, + "grad_norm": 35.268802642822266, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.6128599643707275, + "logits/rejected": -0.5752372741699219, + "logps/chosen": -430.83001708984375, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -664.97021484375, + "loss": 1.1098, + "margin_dpo/margin_mean": 222.81613159179688, + "margin_dpo/margin_std": 298.9530029296875, + "step": 436 + }, + { + "KL/chosen_KL_mean": -371.99591064453125, + "KL/mean": -481.1207275390625, + "KL/rejected_KL_mean": -590.2454833984375, + "KL/std": 237.38775634765625, + "epoch": 0.6417033773861968, + "fcm_dpo/beta": 0.001670231344178319, + "fcm_dpo/delta": 0.036831051111221313, + "fcm_dpo/margin": 218.2496337890625, + "fcm_dpo/q_t": 0.41457653045654297, + "grad_norm": 27.045368194580078, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.6260372400283813, + "logits/rejected": -0.5966402292251587, + "logps/chosen": -436.74017333984375, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -672.2890625, + "loss": 1.1091, + "margin_dpo/margin_mean": 218.2496337890625, + "margin_dpo/margin_std": 285.473388671875, + "step": 437 + }, + { + "KL/chosen_KL_mean": -393.12811279296875, + "KL/mean": -519.9140014648438, + "KL/rejected_KL_mean": -646.6998901367188, + "KL/std": 283.584716796875, + "epoch": 0.6431718061674009, + "fcm_dpo/beta": 0.0016619900707155466, + "fcm_dpo/delta": -0.022839529439806938, + "fcm_dpo/margin": 253.57174682617188, + "fcm_dpo/q_t": 0.40174010396003723, + "grad_norm": 36.76564025878906, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.6138747930526733, + "logits/rejected": -0.5866237878799438, + "logps/chosen": -452.14678955078125, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682800292969, + "logps/rejected": -729.7767333984375, + "loss": 1.0735, + "margin_dpo/margin_mean": 253.57174682617188, + "margin_dpo/margin_std": 311.7683410644531, + "step": 438 + }, + { + "KL/chosen_KL_mean": -400.92803955078125, + "KL/mean": -509.7080993652344, + "KL/rejected_KL_mean": -618.4881591796875, + "KL/std": 285.30078125, + "epoch": 0.644640234948605, + "fcm_dpo/beta": 0.0016392945544794202, + "fcm_dpo/delta": -0.08405376225709915, + "fcm_dpo/margin": 217.56015014648438, + "fcm_dpo/q_t": 0.4188630282878876, + "grad_norm": 30.52996253967285, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.6197365522384644, + "logits/rejected": -0.6234545707702637, + "logps/chosen": -454.71209716796875, + "logps/ref_chosen": -53.78407669067383, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -702.4736328125, + "loss": 1.1405, + "margin_dpo/margin_mean": 217.56015014648438, + "margin_dpo/margin_std": 336.02734375, + "step": 439 + }, + { + "KL/chosen_KL_mean": -425.029541015625, + "KL/mean": -547.2294921875, + "KL/rejected_KL_mean": -669.429443359375, + "KL/std": 345.5404968261719, + "epoch": 0.6461086637298091, + "fcm_dpo/beta": 0.001642939867451787, + "fcm_dpo/delta": -0.0017192382365465164, + "fcm_dpo/margin": 244.39990234375, + "fcm_dpo/q_t": 0.4110341966152191, + "grad_norm": 46.26620101928711, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.6939189434051514, + "logits/rejected": -0.6905303597450256, + "logps/chosen": -503.59625244140625, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -765.9271850585938, + "loss": 1.1009, + "margin_dpo/margin_mean": 244.39990234375, + "margin_dpo/margin_std": 356.36102294921875, + "step": 440 + }, + { + "KL/chosen_KL_mean": -480.99591064453125, + "KL/mean": -598.77001953125, + "KL/rejected_KL_mean": -716.5440673828125, + "KL/std": 346.46795654296875, + "epoch": 0.6475770925110133, + "fcm_dpo/beta": 0.001648401957936585, + "fcm_dpo/delta": 0.011800557374954224, + "fcm_dpo/margin": 235.54818725585938, + "fcm_dpo/q_t": 0.41518956422805786, + "grad_norm": 47.700626373291016, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.6772704720497131, + "logits/rejected": -0.661035418510437, + "logps/chosen": -541.8203125, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -813.014892578125, + "loss": 1.1481, + "margin_dpo/margin_mean": 235.54818725585938, + "margin_dpo/margin_std": 421.46875, + "step": 441 + }, + { + "KL/chosen_KL_mean": -414.614501953125, + "KL/mean": -552.625244140625, + "KL/rejected_KL_mean": -690.6361083984375, + "KL/std": 340.30548095703125, + "epoch": 0.6490455212922174, + "fcm_dpo/beta": 0.0016397257568314672, + "fcm_dpo/delta": -0.055591996759176254, + "fcm_dpo/margin": 276.02154541015625, + "fcm_dpo/q_t": 0.4000922739505768, + "grad_norm": 32.922607421875, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.6050703525543213, + "logits/rejected": -0.5892840623855591, + "logps/chosen": -461.625732421875, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -767.17529296875, + "loss": 1.0748, + "margin_dpo/margin_mean": 276.021484375, + "margin_dpo/margin_std": 379.1744384765625, + "step": 442 + }, + { + "KL/chosen_KL_mean": -455.964111328125, + "KL/mean": -543.126953125, + "KL/rejected_KL_mean": -630.2898559570312, + "KL/std": 322.6165466308594, + "epoch": 0.6505139500734214, + "fcm_dpo/beta": 0.0016242916462942958, + "fcm_dpo/delta": -0.0004603892157319933, + "fcm_dpo/margin": 174.32577514648438, + "fcm_dpo/q_t": 0.4363713562488556, + "grad_norm": 49.77903366088867, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6197609901428223, + "logits/rejected": -0.5921432375907898, + "logps/chosen": -527.2371215820312, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -716.9697875976562, + "loss": 1.2244, + "margin_dpo/margin_mean": 174.32577514648438, + "margin_dpo/margin_std": 398.736572265625, + "step": 443 + }, + { + "KL/chosen_KL_mean": -431.3829040527344, + "KL/mean": -579.9524536132812, + "KL/rejected_KL_mean": -728.5220336914062, + "KL/std": 343.83526611328125, + "epoch": 0.6519823788546255, + "fcm_dpo/beta": 0.0016027928795665503, + "fcm_dpo/delta": -0.0802171379327774, + "fcm_dpo/margin": 297.1391296386719, + "fcm_dpo/q_t": 0.39284807443618774, + "grad_norm": 47.931034088134766, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.5499997138977051, + "logits/rejected": -0.5509436726570129, + "logps/chosen": -488.59661865234375, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489807128906, + "logps/rejected": -825.7769165039062, + "loss": 1.0468, + "margin_dpo/margin_mean": 297.1391296386719, + "margin_dpo/margin_std": 377.43927001953125, + "step": 444 + }, + { + "KL/chosen_KL_mean": -386.8304443359375, + "KL/mean": -519.5946655273438, + "KL/rejected_KL_mean": -652.35888671875, + "KL/std": 278.63726806640625, + "epoch": 0.6534508076358296, + "fcm_dpo/beta": 0.0015939505537971854, + "fcm_dpo/delta": -0.02425987273454666, + "fcm_dpo/margin": 265.5284118652344, + "fcm_dpo/q_t": 0.4040035307407379, + "grad_norm": 33.41164779663086, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.6030969619750977, + "logits/rejected": -0.587662398815155, + "logps/chosen": -454.1302490234375, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267059326172, + "logps/rejected": -745.0415649414062, + "loss": 1.0847, + "margin_dpo/margin_mean": 265.5284118652344, + "margin_dpo/margin_std": 356.4762878417969, + "step": 445 + }, + { + "KL/chosen_KL_mean": -347.9998779296875, + "KL/mean": -496.1356201171875, + "KL/rejected_KL_mean": -644.271484375, + "KL/std": 297.07550048828125, + "epoch": 0.6549192364170338, + "fcm_dpo/beta": 0.001583605189807713, + "fcm_dpo/delta": -0.07294195890426636, + "fcm_dpo/margin": 296.2715759277344, + "fcm_dpo/q_t": 0.3905741572380066, + "grad_norm": 32.357421875, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.593482255935669, + "logits/rejected": -0.6047611236572266, + "logps/chosen": -407.0983581542969, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -745.53564453125, + "loss": 1.0313, + "margin_dpo/margin_mean": 296.2715759277344, + "margin_dpo/margin_std": 308.3010559082031, + "step": 446 + }, + { + "KL/chosen_KL_mean": -356.1015625, + "KL/mean": -507.9754333496094, + "KL/rejected_KL_mean": -659.849365234375, + "KL/std": 338.80889892578125, + "epoch": 0.6563876651982379, + "fcm_dpo/beta": 0.001546173356473446, + "fcm_dpo/delta": -0.07345931977033615, + "fcm_dpo/margin": 303.7477722167969, + "fcm_dpo/q_t": 0.39456889033317566, + "grad_norm": 34.99128341674805, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.6641237735748291, + "logits/rejected": -0.6772187948226929, + "logps/chosen": -412.1768798828125, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -758.5440673828125, + "loss": 1.0466, + "margin_dpo/margin_mean": 303.7477722167969, + "margin_dpo/margin_std": 382.21380615234375, + "step": 447 + }, + { + "KL/chosen_KL_mean": -386.7417297363281, + "KL/mean": -532.7618408203125, + "KL/rejected_KL_mean": -678.7818603515625, + "KL/std": 287.2745056152344, + "epoch": 0.657856093979442, + "fcm_dpo/beta": 0.0015382280107587576, + "fcm_dpo/delta": -0.051600463688373566, + "fcm_dpo/margin": 292.04010009765625, + "fcm_dpo/q_t": 0.3958974778652191, + "grad_norm": 37.12090301513672, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.5725095272064209, + "logits/rejected": -0.5768595933914185, + "logps/chosen": -446.7455749511719, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -781.0465087890625, + "loss": 1.044, + "margin_dpo/margin_mean": 292.0401306152344, + "margin_dpo/margin_std": 318.9796142578125, + "step": 448 + }, + { + "KL/chosen_KL_mean": -391.531982421875, + "KL/mean": -530.09326171875, + "KL/rejected_KL_mean": -668.654541015625, + "KL/std": 336.97320556640625, + "epoch": 0.6593245227606461, + "fcm_dpo/beta": 0.001521222060546279, + "fcm_dpo/delta": -0.02261107787489891, + "fcm_dpo/margin": 277.12255859375, + "fcm_dpo/q_t": 0.4064374566078186, + "grad_norm": 35.58975601196289, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6065933704376221, + "logits/rejected": -0.5960414409637451, + "logps/chosen": -459.05859375, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -757.25146484375, + "loss": 1.1009, + "margin_dpo/margin_mean": 277.12255859375, + "margin_dpo/margin_std": 424.0272216796875, + "step": 449 + }, + { + "KL/chosen_KL_mean": -325.14208984375, + "KL/mean": -486.7525634765625, + "KL/rejected_KL_mean": -648.3630981445312, + "KL/std": 311.44439697265625, + "epoch": 0.6607929515418502, + "fcm_dpo/beta": 0.0015039572026580572, + "fcm_dpo/delta": -0.09042147547006607, + "fcm_dpo/margin": 323.22100830078125, + "fcm_dpo/q_t": 0.3854549527168274, + "grad_norm": 38.07679748535156, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6078156232833862, + "logits/rejected": -0.6151422262191772, + "logps/chosen": -382.2502136230469, + "logps/ref_chosen": -57.10811996459961, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -751.1180419921875, + "loss": 1.0075, + "margin_dpo/margin_mean": 323.2209777832031, + "margin_dpo/margin_std": 305.29486083984375, + "step": 450 + }, + { + "KL/chosen_KL_mean": -423.325439453125, + "KL/mean": -537.2266845703125, + "KL/rejected_KL_mean": -651.1279907226562, + "KL/std": 339.7525634765625, + "epoch": 0.6622613803230544, + "fcm_dpo/beta": 0.001510746544227004, + "fcm_dpo/delta": 0.057636506855487823, + "fcm_dpo/margin": 227.80252075195312, + "fcm_dpo/q_t": 0.41878455877304077, + "grad_norm": 28.49110221862793, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.650975227355957, + "logits/rejected": -0.6234632134437561, + "logps/chosen": -481.79425048828125, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -724.057373046875, + "loss": 1.1516, + "margin_dpo/margin_mean": 227.80252075195312, + "margin_dpo/margin_std": 386.4510498046875, + "step": 451 + }, + { + "KL/chosen_KL_mean": -296.03009033203125, + "KL/mean": -442.83074951171875, + "KL/rejected_KL_mean": -589.63134765625, + "KL/std": 274.8699951171875, + "epoch": 0.6637298091042585, + "fcm_dpo/beta": 0.0014999432023614645, + "fcm_dpo/delta": -0.04238360375165939, + "fcm_dpo/margin": 293.60125732421875, + "fcm_dpo/q_t": 0.3958936929702759, + "grad_norm": 21.779226303100586, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.5605393648147583, + "logits/rejected": -0.5794718265533447, + "logps/chosen": -346.420654296875, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77142333984375, + "logps/rejected": -687.40283203125, + "loss": 1.0375, + "margin_dpo/margin_mean": 293.60125732421875, + "margin_dpo/margin_std": 286.99456787109375, + "step": 452 + }, + { + "KL/chosen_KL_mean": -361.25189208984375, + "KL/mean": -487.23480224609375, + "KL/rejected_KL_mean": -613.2176513671875, + "KL/std": 274.94085693359375, + "epoch": 0.6651982378854625, + "fcm_dpo/beta": 0.0014997010584920645, + "fcm_dpo/delta": 0.022862950339913368, + "fcm_dpo/margin": 251.9658203125, + "fcm_dpo/q_t": 0.4132787585258484, + "grad_norm": 23.549057006835938, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.6005634069442749, + "logits/rejected": -0.576606273651123, + "logps/chosen": -418.96673583984375, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -695.4251098632812, + "loss": 1.1003, + "margin_dpo/margin_mean": 251.9658203125, + "margin_dpo/margin_std": 328.4573974609375, + "step": 453 + }, + { + "KL/chosen_KL_mean": -429.2816162109375, + "KL/mean": -578.0045166015625, + "KL/rejected_KL_mean": -726.7275390625, + "KL/std": 324.15545654296875, + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.0014922961127012968, + "fcm_dpo/delta": -0.04614517092704773, + "fcm_dpo/margin": 297.4459228515625, + "fcm_dpo/q_t": 0.3982745409011841, + "grad_norm": 23.49666404724121, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.6576023101806641, + "logits/rejected": -0.6443264484405518, + "logps/chosen": -490.2272644042969, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.95079040527344, + "logps/rejected": -811.6783447265625, + "loss": 1.0595, + "margin_dpo/margin_mean": 297.4459228515625, + "margin_dpo/margin_std": 368.7091369628906, + "step": 454 + }, + { + "KL/chosen_KL_mean": -380.530517578125, + "KL/mean": -561.5405883789062, + "KL/rejected_KL_mean": -742.5506591796875, + "KL/std": 345.6868896484375, + "epoch": 0.6681350954478708, + "fcm_dpo/beta": 0.0014687062939628959, + "fcm_dpo/delta": -0.13889265060424805, + "fcm_dpo/margin": 362.0201416015625, + "fcm_dpo/q_t": 0.37912923097610474, + "grad_norm": 33.2188606262207, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6506750583648682, + "logits/rejected": -0.6731724739074707, + "logps/chosen": -425.417236328125, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -857.8521728515625, + "loss": 1.0019, + "margin_dpo/margin_mean": 362.0201416015625, + "margin_dpo/margin_std": 392.91925048828125, + "step": 455 + }, + { + "KL/chosen_KL_mean": -401.8112487792969, + "KL/mean": -575.7777099609375, + "KL/rejected_KL_mean": -749.744140625, + "KL/std": 341.2975158691406, + "epoch": 0.6696035242290749, + "fcm_dpo/beta": 0.0014300058828666806, + "fcm_dpo/delta": -0.10262109339237213, + "fcm_dpo/margin": 347.93292236328125, + "fcm_dpo/q_t": 0.38482022285461426, + "grad_norm": 32.33451843261719, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.612378716468811, + "logits/rejected": -0.632037878036499, + "logps/chosen": -458.8480224609375, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21784210205078, + "logps/rejected": -854.9620361328125, + "loss": 1.0025, + "margin_dpo/margin_mean": 347.9329528808594, + "margin_dpo/margin_std": 340.260986328125, + "step": 456 + }, + { + "KL/chosen_KL_mean": -395.79827880859375, + "KL/mean": -566.0772705078125, + "KL/rejected_KL_mean": -736.3562622070312, + "KL/std": 335.4608459472656, + "epoch": 0.671071953010279, + "fcm_dpo/beta": 0.0014014223124831915, + "fcm_dpo/delta": -0.08146154880523682, + "fcm_dpo/margin": 340.5579833984375, + "fcm_dpo/q_t": 0.3878782391548157, + "grad_norm": 27.79351043701172, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.6594676971435547, + "logits/rejected": -0.6568940877914429, + "logps/chosen": -450.0408020019531, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -821.4658203125, + "loss": 1.0157, + "margin_dpo/margin_mean": 340.5579833984375, + "margin_dpo/margin_std": 328.9454345703125, + "step": 457 + }, + { + "KL/chosen_KL_mean": -425.1473693847656, + "KL/mean": -578.1829833984375, + "KL/rejected_KL_mean": -731.2185668945312, + "KL/std": 312.2036437988281, + "epoch": 0.6725403817914831, + "fcm_dpo/beta": 0.001388939330354333, + "fcm_dpo/delta": -0.0265361275523901, + "fcm_dpo/margin": 306.07122802734375, + "fcm_dpo/q_t": 0.4020830988883972, + "grad_norm": 24.5087890625, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.6767433881759644, + "logits/rejected": -0.6698124408721924, + "logps/chosen": -480.5562438964844, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -828.90185546875, + "loss": 1.0668, + "margin_dpo/margin_mean": 306.07122802734375, + "margin_dpo/margin_std": 367.548095703125, + "step": 458 + }, + { + "KL/chosen_KL_mean": -477.1727294921875, + "KL/mean": -637.5369873046875, + "KL/rejected_KL_mean": -797.9011840820312, + "KL/std": 362.09600830078125, + "epoch": 0.6740088105726872, + "fcm_dpo/beta": 0.0013780685840174556, + "fcm_dpo/delta": -0.04427627474069595, + "fcm_dpo/margin": 320.7283935546875, + "fcm_dpo/q_t": 0.3988497853279114, + "grad_norm": 37.77009201049805, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.6691190004348755, + "logits/rejected": -0.6827735304832458, + "logps/chosen": -523.730224609375, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -884.0697021484375, + "loss": 1.0698, + "margin_dpo/margin_mean": 320.7284240722656, + "margin_dpo/margin_std": 416.4349365234375, + "step": 459 + }, + { + "KL/chosen_KL_mean": -524.5628051757812, + "KL/mean": -702.454833984375, + "KL/rejected_KL_mean": -880.3468627929688, + "KL/std": 362.19757080078125, + "epoch": 0.6754772393538914, + "fcm_dpo/beta": 0.0013614799827337265, + "fcm_dpo/delta": -0.08867627382278442, + "fcm_dpo/margin": 355.7840576171875, + "fcm_dpo/q_t": 0.38881251215934753, + "grad_norm": 38.9324836730957, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.7337905168533325, + "logits/rejected": -0.7658564448356628, + "logps/chosen": -576.19775390625, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -984.4661865234375, + "loss": 1.0251, + "margin_dpo/margin_mean": 355.7840576171875, + "margin_dpo/margin_std": 394.8114013671875, + "step": 460 + }, + { + "KL/chosen_KL_mean": -551.4806518554688, + "KL/mean": -691.9464111328125, + "KL/rejected_KL_mean": -832.4122314453125, + "KL/std": 369.3660888671875, + "epoch": 0.6769456681350955, + "fcm_dpo/beta": 0.001360948197543621, + "fcm_dpo/delta": 0.018288645893335342, + "fcm_dpo/margin": 280.931640625, + "fcm_dpo/q_t": 0.41348153352737427, + "grad_norm": 25.64112663269043, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.7380908727645874, + "logits/rejected": -0.750026524066925, + "logps/chosen": -606.66259765625, + "logps/ref_chosen": -55.18195724487305, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -918.88916015625, + "loss": 1.114, + "margin_dpo/margin_mean": 280.931640625, + "margin_dpo/margin_std": 416.9804992675781, + "step": 461 + }, + { + "KL/chosen_KL_mean": -591.0242919921875, + "KL/mean": -702.4153442382812, + "KL/rejected_KL_mean": -813.806396484375, + "KL/std": 374.5951232910156, + "epoch": 0.6784140969162996, + "fcm_dpo/beta": 0.0013791057281196117, + "fcm_dpo/delta": 0.09573453664779663, + "fcm_dpo/margin": 222.78219604492188, + "fcm_dpo/q_t": 0.4319148361682892, + "grad_norm": 43.11104965209961, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.8458345532417297, + "logits/rejected": -0.829505205154419, + "logps/chosen": -660.9522705078125, + "logps/ref_chosen": -69.92803192138672, + "logps/ref_rejected": -78.84111022949219, + "logps/rejected": -892.6475830078125, + "loss": 1.1826, + "margin_dpo/margin_mean": 222.78216552734375, + "margin_dpo/margin_std": 426.85430908203125, + "step": 462 + }, + { + "KL/chosen_KL_mean": -574.5623779296875, + "KL/mean": -721.1395263671875, + "KL/rejected_KL_mean": -867.7166748046875, + "KL/std": 383.65875244140625, + "epoch": 0.6798825256975036, + "fcm_dpo/beta": 0.0013890512054786086, + "fcm_dpo/delta": -0.007682671770453453, + "fcm_dpo/margin": 293.15423583984375, + "fcm_dpo/q_t": 0.40834498405456543, + "grad_norm": 39.33317565917969, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.8075680732727051, + "logits/rejected": -0.8106831312179565, + "logps/chosen": -629.8367919921875, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -956.7415771484375, + "loss": 1.1057, + "margin_dpo/margin_mean": 293.15423583984375, + "margin_dpo/margin_std": 435.8871765136719, + "step": 463 + }, + { + "KL/chosen_KL_mean": -575.095458984375, + "KL/mean": -810.3673095703125, + "KL/rejected_KL_mean": -1045.63916015625, + "KL/std": 450.2059631347656, + "epoch": 0.6813509544787077, + "fcm_dpo/beta": 0.0013278971891850233, + "fcm_dpo/delta": -0.24185608327388763, + "fcm_dpo/margin": 470.54376220703125, + "fcm_dpo/q_t": 0.35767611861228943, + "grad_norm": 36.297725677490234, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.7917243242263794, + "logits/rejected": -0.8560171127319336, + "logps/chosen": -626.0076904296875, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -1148.1285400390625, + "loss": 0.937, + "margin_dpo/margin_mean": 470.5437316894531, + "margin_dpo/margin_std": 456.50927734375, + "step": 464 + }, + { + "KL/chosen_KL_mean": -572.2319946289062, + "KL/mean": -770.283447265625, + "KL/rejected_KL_mean": -968.3349609375, + "KL/std": 461.42987060546875, + "epoch": 0.6828193832599119, + "fcm_dpo/beta": 0.0012997114099562168, + "fcm_dpo/delta": -0.12087617814540863, + "fcm_dpo/margin": 396.10296630859375, + "fcm_dpo/q_t": 0.38136354088783264, + "grad_norm": 36.88157653808594, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.7635716199874878, + "logits/rejected": -0.7767517566680908, + "logps/chosen": -632.348876953125, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -1082.281005859375, + "loss": 1.0321, + "margin_dpo/margin_mean": 396.10296630859375, + "margin_dpo/margin_std": 493.5492858886719, + "step": 465 + }, + { + "KL/chosen_KL_mean": -633.3134765625, + "KL/mean": -802.6712646484375, + "KL/rejected_KL_mean": -972.0289306640625, + "KL/std": 447.70947265625, + "epoch": 0.684287812041116, + "fcm_dpo/beta": 0.001274168025702238, + "fcm_dpo/delta": -0.03375307843089104, + "fcm_dpo/margin": 338.7154541015625, + "fcm_dpo/q_t": 0.4008180499076843, + "grad_norm": 33.14835739135742, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.8126999139785767, + "logits/rejected": -0.8147940635681152, + "logps/chosen": -686.234375, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -1062.3443603515625, + "loss": 1.0895, + "margin_dpo/margin_mean": 338.7154541015625, + "margin_dpo/margin_std": 476.6962890625, + "step": 466 + }, + { + "KL/chosen_KL_mean": -806.89892578125, + "KL/mean": -979.3716430664062, + "KL/rejected_KL_mean": -1151.8443603515625, + "KL/std": 577.361572265625, + "epoch": 0.6857562408223201, + "fcm_dpo/beta": 0.001259978162124753, + "fcm_dpo/delta": -0.03823067247867584, + "fcm_dpo/margin": 344.94549560546875, + "fcm_dpo/q_t": 0.40663182735443115, + "grad_norm": 69.0197525024414, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.920897901058197, + "logits/rejected": -0.8992031812667847, + "logps/chosen": -885.61474609375, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -1254.70458984375, + "loss": 1.1674, + "margin_dpo/margin_mean": 344.94549560546875, + "margin_dpo/margin_std": 677.0098876953125, + "step": 467 + }, + { + "KL/chosen_KL_mean": -625.35986328125, + "KL/mean": -837.527099609375, + "KL/rejected_KL_mean": -1049.6942138671875, + "KL/std": 500.7874755859375, + "epoch": 0.6872246696035242, + "fcm_dpo/beta": 0.0012417640537023544, + "fcm_dpo/delta": -0.13427412509918213, + "fcm_dpo/margin": 424.33441162109375, + "fcm_dpo/q_t": 0.3849431276321411, + "grad_norm": 38.766334533691406, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.869031548500061, + "logits/rejected": -0.8773350715637207, + "logps/chosen": -695.2952270507812, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02880859375, + "logps/rejected": -1150.7230224609375, + "loss": 1.028, + "margin_dpo/margin_mean": 424.33441162109375, + "margin_dpo/margin_std": 557.9710693359375, + "step": 468 + }, + { + "KL/chosen_KL_mean": -647.9295654296875, + "KL/mean": -849.9874267578125, + "KL/rejected_KL_mean": -1052.0452880859375, + "KL/std": 426.935791015625, + "epoch": 0.6886930983847284, + "fcm_dpo/beta": 0.0012096271384507418, + "fcm_dpo/delta": -0.09502536803483963, + "fcm_dpo/margin": 404.11572265625, + "fcm_dpo/q_t": 0.38767051696777344, + "grad_norm": 37.197357177734375, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.8826281428337097, + "logits/rejected": -0.8875974416732788, + "logps/chosen": -716.05419921875, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -1156.8316650390625, + "loss": 1.0321, + "margin_dpo/margin_mean": 404.11572265625, + "margin_dpo/margin_std": 464.22857666015625, + "step": 469 + }, + { + "KL/chosen_KL_mean": -522.9632568359375, + "KL/mean": -709.4610595703125, + "KL/rejected_KL_mean": -895.9589233398438, + "KL/std": 429.58807373046875, + "epoch": 0.6901615271659325, + "fcm_dpo/beta": 0.001205753069370985, + "fcm_dpo/delta": -0.052081190049648285, + "fcm_dpo/margin": 372.99566650390625, + "fcm_dpo/q_t": 0.3957051932811737, + "grad_norm": 26.235424041748047, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.8188216686248779, + "logits/rejected": -0.8245443105697632, + "logps/chosen": -566.755126953125, + "logps/ref_chosen": -43.791927337646484, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -978.6617431640625, + "loss": 1.0611, + "margin_dpo/margin_mean": 372.99566650390625, + "margin_dpo/margin_std": 466.9468078613281, + "step": 470 + }, + { + "KL/chosen_KL_mean": -658.70751953125, + "KL/mean": -822.0084228515625, + "KL/rejected_KL_mean": -985.309326171875, + "KL/std": 467.14373779296875, + "epoch": 0.6916299559471366, + "fcm_dpo/beta": 0.0011935688089579344, + "fcm_dpo/delta": 0.009803693741559982, + "fcm_dpo/margin": 326.6017761230469, + "fcm_dpo/q_t": 0.41308581829071045, + "grad_norm": 32.16667938232422, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.90375816822052, + "logits/rejected": -0.9042317271232605, + "logps/chosen": -722.0469970703125, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -1068.9197998046875, + "loss": 1.1159, + "margin_dpo/margin_mean": 326.601806640625, + "margin_dpo/margin_std": 494.537353515625, + "step": 471 + }, + { + "KL/chosen_KL_mean": -641.023681640625, + "KL/mean": -837.814697265625, + "KL/rejected_KL_mean": -1034.605712890625, + "KL/std": 537.2503662109375, + "epoch": 0.6930983847283406, + "fcm_dpo/beta": 0.0011852658353745937, + "fcm_dpo/delta": -0.07024183124303818, + "fcm_dpo/margin": 393.58203125, + "fcm_dpo/q_t": 0.3995856046676636, + "grad_norm": 28.73564338684082, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.8875927925109863, + "logits/rejected": -0.8611509203910828, + "logps/chosen": -724.6898193359375, + "logps/ref_chosen": -83.66610717773438, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -1151.81494140625, + "loss": 1.08, + "margin_dpo/margin_mean": 393.58203125, + "margin_dpo/margin_std": 579.6232299804688, + "step": 472 + }, + { + "KL/chosen_KL_mean": -757.5256958007812, + "KL/mean": -852.3858642578125, + "KL/rejected_KL_mean": -947.24609375, + "KL/std": 554.10888671875, + "epoch": 0.6945668135095447, + "fcm_dpo/beta": 0.0012000746792182326, + "fcm_dpo/delta": 0.06275806576013565, + "fcm_dpo/margin": 189.72039794921875, + "fcm_dpo/q_t": 0.4505438506603241, + "grad_norm": 99.1730728149414, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -0.8745533227920532, + "logits/rejected": -0.8379828929901123, + "logps/chosen": -821.022705078125, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -1028.3927001953125, + "loss": 1.3228, + "margin_dpo/margin_mean": 189.72039794921875, + "margin_dpo/margin_std": 697.6124267578125, + "step": 473 + }, + { + "KL/chosen_KL_mean": -628.0360107421875, + "KL/mean": -785.1285400390625, + "KL/rejected_KL_mean": -942.2210693359375, + "KL/std": 488.5404052734375, + "epoch": 0.6960352422907489, + "fcm_dpo/beta": 0.0011911317706108093, + "fcm_dpo/delta": -0.0750809758901596, + "fcm_dpo/margin": 314.18511962890625, + "fcm_dpo/q_t": 0.41488319635391235, + "grad_norm": 34.411014556884766, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.820152223110199, + "logits/rejected": -0.8232827186584473, + "logps/chosen": -680.64794921875, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -1032.301513671875, + "loss": 1.1519, + "margin_dpo/margin_mean": 314.1850891113281, + "margin_dpo/margin_std": 546.5555419921875, + "step": 474 + }, + { + "KL/chosen_KL_mean": -469.183837890625, + "KL/mean": -671.3321533203125, + "KL/rejected_KL_mean": -873.4803466796875, + "KL/std": 405.08099365234375, + "epoch": 0.697503671071953, + "fcm_dpo/beta": 0.0011684303171932697, + "fcm_dpo/delta": -0.07601547241210938, + "fcm_dpo/margin": 404.2965087890625, + "fcm_dpo/q_t": 0.39084818959236145, + "grad_norm": 29.762432098388672, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.8730387687683105, + "logits/rejected": -0.896369993686676, + "logps/chosen": -511.6790466308594, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06294250488281, + "logps/rejected": -963.5432739257812, + "loss": 1.0303, + "margin_dpo/margin_mean": 404.2965087890625, + "margin_dpo/margin_std": 443.20880126953125, + "step": 475 + }, + { + "KL/chosen_KL_mean": -576.46826171875, + "KL/mean": -734.1749267578125, + "KL/rejected_KL_mean": -891.881591796875, + "KL/std": 429.33795166015625, + "epoch": 0.6989720998531571, + "fcm_dpo/beta": 0.0011697396403178573, + "fcm_dpo/delta": 0.03224332630634308, + "fcm_dpo/margin": 315.4133605957031, + "fcm_dpo/q_t": 0.41562318801879883, + "grad_norm": 34.20625686645508, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.8082433342933655, + "logits/rejected": -0.8174213171005249, + "logps/chosen": -619.4176025390625, + "logps/ref_chosen": -42.94938278198242, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -965.591796875, + "loss": 1.1123, + "margin_dpo/margin_mean": 315.4133605957031, + "margin_dpo/margin_std": 440.5125732421875, + "step": 476 + }, + { + "KL/chosen_KL_mean": -598.4927978515625, + "KL/mean": -759.2872314453125, + "KL/rejected_KL_mean": -920.0816650390625, + "KL/std": 456.7637939453125, + "epoch": 0.7004405286343612, + "fcm_dpo/beta": 0.0011787796393036842, + "fcm_dpo/delta": 0.02154078520834446, + "fcm_dpo/margin": 321.58892822265625, + "fcm_dpo/q_t": 0.4136529564857483, + "grad_norm": 26.809673309326172, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.860885739326477, + "logits/rejected": -0.8121699094772339, + "logps/chosen": -669.265380859375, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -996.2190551757812, + "loss": 1.1364, + "margin_dpo/margin_mean": 321.5888977050781, + "margin_dpo/margin_std": 541.0950927734375, + "step": 477 + }, + { + "KL/chosen_KL_mean": -491.0763854980469, + "KL/mean": -672.4014892578125, + "KL/rejected_KL_mean": -853.7266845703125, + "KL/std": 389.57879638671875, + "epoch": 0.7019089574155654, + "fcm_dpo/beta": 0.0011755165178328753, + "fcm_dpo/delta": -0.02756289392709732, + "fcm_dpo/margin": 362.6502685546875, + "fcm_dpo/q_t": 0.4015880227088928, + "grad_norm": 28.476211547851562, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.7467737197875977, + "logits/rejected": -0.7583505511283875, + "logps/chosen": -532.5169067382812, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -939.088623046875, + "loss": 1.0715, + "margin_dpo/margin_mean": 362.6502685546875, + "margin_dpo/margin_std": 451.751708984375, + "step": 478 + }, + { + "KL/chosen_KL_mean": -616.0745849609375, + "KL/mean": -788.5822143554688, + "KL/rejected_KL_mean": -961.08984375, + "KL/std": 471.95684814453125, + "epoch": 0.7033773861967695, + "fcm_dpo/beta": 0.001176186604425311, + "fcm_dpo/delta": -0.0067070163786411285, + "fcm_dpo/margin": 345.01519775390625, + "fcm_dpo/q_t": 0.40878403186798096, + "grad_norm": 28.581802368164062, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.7714790105819702, + "logits/rejected": -0.794031023979187, + "logps/chosen": -669.9825439453125, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -1056.2061767578125, + "loss": 1.1094, + "margin_dpo/margin_mean": 345.01519775390625, + "margin_dpo/margin_std": 522.553955078125, + "step": 479 + }, + { + "KL/chosen_KL_mean": -697.255126953125, + "KL/mean": -839.0286865234375, + "KL/rejected_KL_mean": -980.8023071289062, + "KL/std": 442.7588806152344, + "epoch": 0.7048458149779736, + "fcm_dpo/beta": 0.00116480584256351, + "fcm_dpo/delta": -0.03519085794687271, + "fcm_dpo/margin": 283.5472106933594, + "fcm_dpo/q_t": 0.42370662093162537, + "grad_norm": 48.71305847167969, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.7846644520759583, + "logits/rejected": -0.7700395584106445, + "logps/chosen": -755.9378051757812, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -1063.73486328125, + "loss": 1.1653, + "margin_dpo/margin_mean": 283.5472412109375, + "margin_dpo/margin_std": 481.92913818359375, + "step": 480 + }, + { + "KL/chosen_KL_mean": -606.09716796875, + "KL/mean": -816.431640625, + "KL/rejected_KL_mean": -1026.76611328125, + "KL/std": 473.70269775390625, + "epoch": 0.7063142437591777, + "fcm_dpo/beta": 0.0011470152530819178, + "fcm_dpo/delta": -0.08663056790828705, + "fcm_dpo/margin": 420.6690368652344, + "fcm_dpo/q_t": 0.39048588275909424, + "grad_norm": 37.28335189819336, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.8456419110298157, + "logits/rejected": -0.8666043281555176, + "logps/chosen": -661.0614013671875, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -1119.1866455078125, + "loss": 1.0382, + "margin_dpo/margin_mean": 420.6690368652344, + "margin_dpo/margin_std": 509.59088134765625, + "step": 481 + }, + { + "KL/chosen_KL_mean": -669.8154907226562, + "KL/mean": -776.3551025390625, + "KL/rejected_KL_mean": -882.894775390625, + "KL/std": 481.4432373046875, + "epoch": 0.7077826725403817, + "fcm_dpo/beta": 0.0011683362536132336, + "fcm_dpo/delta": 0.15478384494781494, + "fcm_dpo/margin": 213.07933044433594, + "fcm_dpo/q_t": 0.4412637948989868, + "grad_norm": 46.77339172363281, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.7833594083786011, + "logits/rejected": -0.7691007852554321, + "logps/chosen": -737.368896484375, + "logps/ref_chosen": -67.553466796875, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -970.4843139648438, + "loss": 1.2584, + "margin_dpo/margin_mean": 213.07931518554688, + "margin_dpo/margin_std": 576.9671630859375, + "step": 482 + }, + { + "KL/chosen_KL_mean": -593.7992553710938, + "KL/mean": -821.1121826171875, + "KL/rejected_KL_mean": -1048.425048828125, + "KL/std": 477.17486572265625, + "epoch": 0.7092511013215859, + "fcm_dpo/beta": 0.0011535545345395803, + "fcm_dpo/delta": -0.13136449456214905, + "fcm_dpo/margin": 454.625732421875, + "fcm_dpo/q_t": 0.3826148509979248, + "grad_norm": 31.144506454467773, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.871976375579834, + "logits/rejected": -0.878372311592102, + "logps/chosen": -657.1290893554688, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -1144.2119140625, + "loss": 1.0182, + "margin_dpo/margin_mean": 454.6257629394531, + "margin_dpo/margin_std": 549.6389770507812, + "step": 483 + }, + { + "KL/chosen_KL_mean": -519.2157592773438, + "KL/mean": -720.3311767578125, + "KL/rejected_KL_mean": -921.4464721679688, + "KL/std": 480.59307861328125, + "epoch": 0.71071953010279, + "fcm_dpo/beta": 0.0011427226709201932, + "fcm_dpo/delta": -0.06293704360723495, + "fcm_dpo/margin": 402.230712890625, + "fcm_dpo/q_t": 0.39542460441589355, + "grad_norm": 40.899009704589844, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.8334769010543823, + "logits/rejected": -0.8463296890258789, + "logps/chosen": -578.3538818359375, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -1005.8179321289062, + "loss": 1.0499, + "margin_dpo/margin_mean": 402.230712890625, + "margin_dpo/margin_std": 477.49810791015625, + "step": 484 + }, + { + "KL/chosen_KL_mean": -550.48193359375, + "KL/mean": -742.0205078125, + "KL/rejected_KL_mean": -933.5590209960938, + "KL/std": 464.6655578613281, + "epoch": 0.7121879588839941, + "fcm_dpo/beta": 0.0011293399147689342, + "fcm_dpo/delta": -0.03417329490184784, + "fcm_dpo/margin": 383.07708740234375, + "fcm_dpo/q_t": 0.40266501903533936, + "grad_norm": 32.848182678222656, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.8008699417114258, + "logits/rejected": -0.8162240386009216, + "logps/chosen": -609.33154296875, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408233642578, + "logps/rejected": -1036.923095703125, + "loss": 1.0859, + "margin_dpo/margin_mean": 383.07708740234375, + "margin_dpo/margin_std": 541.0684814453125, + "step": 485 + }, + { + "KL/chosen_KL_mean": -621.676513671875, + "KL/mean": -835.73876953125, + "KL/rejected_KL_mean": -1049.801025390625, + "KL/std": 494.6927185058594, + "epoch": 0.7136563876651982, + "fcm_dpo/beta": 0.0011101996060460806, + "fcm_dpo/delta": -0.079122394323349, + "fcm_dpo/margin": 428.12445068359375, + "fcm_dpo/q_t": 0.39419782161712646, + "grad_norm": 31.21572494506836, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.908329963684082, + "logits/rejected": -0.9156872034072876, + "logps/chosen": -676.9362182617188, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -1141.9404296875, + "loss": 1.072, + "margin_dpo/margin_mean": 428.12445068359375, + "margin_dpo/margin_std": 619.9473876953125, + "step": 486 + }, + { + "KL/chosen_KL_mean": -667.73486328125, + "KL/mean": -831.6156616210938, + "KL/rejected_KL_mean": -995.4964599609375, + "KL/std": 467.8983154296875, + "epoch": 0.7151248164464024, + "fcm_dpo/beta": 0.0011161823058500886, + "fcm_dpo/delta": 0.0350751131772995, + "fcm_dpo/margin": 327.7616271972656, + "fcm_dpo/q_t": 0.41450557112693787, + "grad_norm": 42.0443229675293, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -0.8875995874404907, + "logits/rejected": -0.891254186630249, + "logps/chosen": -720.7981567382812, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.41883087158203, + "logps/rejected": -1087.915283203125, + "loss": 1.125, + "margin_dpo/margin_mean": 327.7616271972656, + "margin_dpo/margin_std": 490.9307861328125, + "step": 487 + }, + { + "KL/chosen_KL_mean": -553.8161010742188, + "KL/mean": -718.7296142578125, + "KL/rejected_KL_mean": -883.6431884765625, + "KL/std": 415.6312255859375, + "epoch": 0.7165932452276065, + "fcm_dpo/beta": 0.0011240593157708645, + "fcm_dpo/delta": 0.02986850030720234, + "fcm_dpo/margin": 329.82708740234375, + "fcm_dpo/q_t": 0.4142517149448395, + "grad_norm": 32.32245635986328, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.8679848313331604, + "logits/rejected": -0.8698484301567078, + "logps/chosen": -606.0442504882812, + "logps/ref_chosen": -52.22815704345703, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -967.6497802734375, + "loss": 1.1052, + "margin_dpo/margin_mean": 329.82708740234375, + "margin_dpo/margin_std": 427.99810791015625, + "step": 488 + }, + { + "KL/chosen_KL_mean": -516.3485107421875, + "KL/mean": -680.156982421875, + "KL/rejected_KL_mean": -843.9655151367188, + "KL/std": 421.83648681640625, + "epoch": 0.7180616740088106, + "fcm_dpo/beta": 0.001128336414694786, + "fcm_dpo/delta": 0.031364768743515015, + "fcm_dpo/margin": 327.616943359375, + "fcm_dpo/q_t": 0.4163089990615845, + "grad_norm": 32.1641960144043, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.9042928218841553, + "logits/rejected": -0.8924728035926819, + "logps/chosen": -572.338134765625, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39812469482422, + "logps/rejected": -923.3636474609375, + "loss": 1.1145, + "margin_dpo/margin_mean": 327.616943359375, + "margin_dpo/margin_std": 469.63079833984375, + "step": 489 + }, + { + "KL/chosen_KL_mean": -605.282958984375, + "KL/mean": -779.2673950195312, + "KL/rejected_KL_mean": -953.2518310546875, + "KL/std": 508.89208984375, + "epoch": 0.7195301027900147, + "fcm_dpo/beta": 0.001132056349888444, + "fcm_dpo/delta": 0.006162045523524284, + "fcm_dpo/margin": 347.96881103515625, + "fcm_dpo/q_t": 0.4120814800262451, + "grad_norm": 57.53245544433594, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.9138531684875488, + "logits/rejected": -0.9565045833587646, + "logps/chosen": -657.6494140625, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.4090576171875, + "logps/rejected": -1063.660888671875, + "loss": 1.1397, + "margin_dpo/margin_mean": 347.96881103515625, + "margin_dpo/margin_std": 604.8751220703125, + "step": 490 + }, + { + "KL/chosen_KL_mean": -560.5130615234375, + "KL/mean": -677.8720703125, + "KL/rejected_KL_mean": -795.2310791015625, + "KL/std": 434.89801025390625, + "epoch": 0.7209985315712188, + "fcm_dpo/beta": 0.0011535290395841002, + "fcm_dpo/delta": 0.1326218992471695, + "fcm_dpo/margin": 234.71804809570312, + "fcm_dpo/q_t": 0.438266396522522, + "grad_norm": 30.0527400970459, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.887237548828125, + "logits/rejected": -0.8714909553527832, + "logps/chosen": -620.6292724609375, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -868.50390625, + "loss": 1.195, + "margin_dpo/margin_mean": 234.7180633544922, + "margin_dpo/margin_std": 441.885009765625, + "step": 491 + }, + { + "KL/chosen_KL_mean": -571.1317138671875, + "KL/mean": -690.6876220703125, + "KL/rejected_KL_mean": -810.2435302734375, + "KL/std": 437.35736083984375, + "epoch": 0.7224669603524229, + "fcm_dpo/beta": 0.0011779199121519923, + "fcm_dpo/delta": 0.121956005692482, + "fcm_dpo/margin": 239.11178588867188, + "fcm_dpo/q_t": 0.4370569884777069, + "grad_norm": 29.874557495117188, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -0.9431591033935547, + "logits/rejected": -0.9226012229919434, + "logps/chosen": -625.1259155273438, + "logps/ref_chosen": -53.994178771972656, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -882.9031372070312, + "loss": 1.2094, + "margin_dpo/margin_mean": 239.1117706298828, + "margin_dpo/margin_std": 507.32025146484375, + "step": 492 + }, + { + "KL/chosen_KL_mean": -595.4638061523438, + "KL/mean": -726.3221435546875, + "KL/rejected_KL_mean": -857.1805419921875, + "KL/std": 469.4154968261719, + "epoch": 0.723935389133627, + "fcm_dpo/beta": 0.0011845249682664871, + "fcm_dpo/delta": -0.016040312126278877, + "fcm_dpo/margin": 261.7166748046875, + "fcm_dpo/q_t": 0.42683732509613037, + "grad_norm": 33.20534896850586, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -0.9668236970901489, + "logits/rejected": -0.9259661436080933, + "logps/chosen": -670.9610595703125, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -944.5035400390625, + "loss": 1.1687, + "margin_dpo/margin_mean": 261.7166748046875, + "margin_dpo/margin_std": 446.52606201171875, + "step": 493 + }, + { + "KL/chosen_KL_mean": -478.02276611328125, + "KL/mean": -682.6138916015625, + "KL/rejected_KL_mean": -887.2049560546875, + "KL/std": 429.03424072265625, + "epoch": 0.7254038179148311, + "fcm_dpo/beta": 0.0011693753767758608, + "fcm_dpo/delta": -0.08248934149742126, + "fcm_dpo/margin": 409.18218994140625, + "fcm_dpo/q_t": 0.38879674673080444, + "grad_norm": 43.84877395629883, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.7634121179580688, + "logits/rejected": -0.7962794303894043, + "logps/chosen": -519.382080078125, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -973.29638671875, + "loss": 1.0237, + "margin_dpo/margin_mean": 409.18218994140625, + "margin_dpo/margin_std": 432.674072265625, + "step": 494 + }, + { + "KL/chosen_KL_mean": -547.5703125, + "KL/mean": -727.9459228515625, + "KL/rejected_KL_mean": -908.321533203125, + "KL/std": 453.545654296875, + "epoch": 0.7268722466960352, + "fcm_dpo/beta": 0.0011636005947366357, + "fcm_dpo/delta": -0.020662881433963776, + "fcm_dpo/margin": 360.75115966796875, + "fcm_dpo/q_t": 0.40708619356155396, + "grad_norm": 42.19923782348633, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -0.9473215937614441, + "logits/rejected": -0.9521135091781616, + "logps/chosen": -611.1054077148438, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -999.7459716796875, + "loss": 1.0991, + "margin_dpo/margin_mean": 360.7511901855469, + "margin_dpo/margin_std": 544.379638671875, + "step": 495 + }, + { + "KL/chosen_KL_mean": -643.5244750976562, + "KL/mean": -745.2659912109375, + "KL/rejected_KL_mean": -847.007568359375, + "KL/std": 351.7397766113281, + "epoch": 0.7283406754772394, + "fcm_dpo/beta": 0.0011893340852111578, + "fcm_dpo/delta": 0.1618269979953766, + "fcm_dpo/margin": 203.4831085205078, + "fcm_dpo/q_t": 0.44347870349884033, + "grad_norm": 57.562477111816406, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.9014628529548645, + "logits/rejected": -0.8666530251502991, + "logps/chosen": -716.1163940429688, + "logps/ref_chosen": -72.5919189453125, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -931.3369140625, + "loss": 1.2108, + "margin_dpo/margin_mean": 203.48312377929688, + "margin_dpo/margin_std": 389.7113037109375, + "step": 496 + }, + { + "KL/chosen_KL_mean": -629.3239135742188, + "KL/mean": -727.7587890625, + "KL/rejected_KL_mean": -826.1937255859375, + "KL/std": 427.0351867675781, + "epoch": 0.7298091042584435, + "fcm_dpo/beta": 0.001203190186060965, + "fcm_dpo/delta": 0.019497813656926155, + "fcm_dpo/margin": 196.86973571777344, + "fcm_dpo/q_t": 0.4448572099208832, + "grad_norm": 40.526710510253906, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.879808783531189, + "logits/rejected": -0.8550307154655457, + "logps/chosen": -687.9178466796875, + "logps/ref_chosen": -58.59397506713867, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -902.4820556640625, + "loss": 1.2269, + "margin_dpo/margin_mean": 196.86973571777344, + "margin_dpo/margin_std": 415.5872802734375, + "step": 497 + }, + { + "KL/chosen_KL_mean": -624.457275390625, + "KL/mean": -773.5231323242188, + "KL/rejected_KL_mean": -922.5890502929688, + "KL/std": 460.3760986328125, + "epoch": 0.7312775330396476, + "fcm_dpo/beta": 0.00121046113781631, + "fcm_dpo/delta": 0.04058893769979477, + "fcm_dpo/margin": 298.13177490234375, + "fcm_dpo/q_t": 0.416517436504364, + "grad_norm": 28.048709869384766, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.8946305513381958, + "logits/rejected": -0.874454140663147, + "logps/chosen": -695.6629028320312, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -1006.547119140625, + "loss": 1.1448, + "margin_dpo/margin_mean": 298.1318054199219, + "margin_dpo/margin_std": 504.37261962890625, + "step": 498 + }, + { + "KL/chosen_KL_mean": -535.3038940429688, + "KL/mean": -735.150146484375, + "KL/rejected_KL_mean": -934.9964599609375, + "KL/std": 470.1824951171875, + "epoch": 0.7327459618208517, + "fcm_dpo/beta": 0.0011967134196311235, + "fcm_dpo/delta": -0.08238838613033295, + "fcm_dpo/margin": 399.6925964355469, + "fcm_dpo/q_t": 0.39139658212661743, + "grad_norm": 45.138729095458984, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -0.8791499733924866, + "logits/rejected": -0.9143052697181702, + "logps/chosen": -586.55908203125, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -1036.0751953125, + "loss": 1.0497, + "margin_dpo/margin_mean": 399.692626953125, + "margin_dpo/margin_std": 514.5656127929688, + "step": 499 + }, + { + "KL/chosen_KL_mean": -659.9070434570312, + "KL/mean": -845.4681396484375, + "KL/rejected_KL_mean": -1031.0291748046875, + "KL/std": 449.5928039550781, + "epoch": 0.7342143906020558, + "fcm_dpo/beta": 0.0011844468535855412, + "fcm_dpo/delta": -0.04147026687860489, + "fcm_dpo/margin": 371.12213134765625, + "fcm_dpo/q_t": 0.4010527729988098, + "grad_norm": 30.78356170654297, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.8759046792984009, + "logits/rejected": -0.9078898429870605, + "logps/chosen": -716.9344482421875, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -1124.96337890625, + "loss": 1.0946, + "margin_dpo/margin_mean": 371.12213134765625, + "margin_dpo/margin_std": 553.8121337890625, + "step": 500 + }, + { + "KL/chosen_KL_mean": -582.7872924804688, + "KL/mean": -753.5026245117188, + "KL/rejected_KL_mean": -924.218017578125, + "KL/std": 446.186767578125, + "epoch": 0.73568281938326, + "fcm_dpo/beta": 0.001185835339128971, + "fcm_dpo/delta": -0.005243198946118355, + "fcm_dpo/margin": 341.43072509765625, + "fcm_dpo/q_t": 0.4099680185317993, + "grad_norm": 37.05495834350586, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.8284963369369507, + "logits/rejected": -0.8059309720993042, + "logps/chosen": -637.1468505859375, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670013427734, + "logps/rejected": -1004.3746948242188, + "loss": 1.1294, + "margin_dpo/margin_mean": 341.4306640625, + "margin_dpo/margin_std": 574.979248046875, + "step": 501 + }, + { + "KL/chosen_KL_mean": -551.6671142578125, + "KL/mean": -723.5264282226562, + "KL/rejected_KL_mean": -895.3857421875, + "KL/std": 402.4056091308594, + "epoch": 0.737151248164464, + "fcm_dpo/beta": 0.0011830935254693031, + "fcm_dpo/delta": -0.006993459537625313, + "fcm_dpo/margin": 343.71856689453125, + "fcm_dpo/q_t": 0.40695464611053467, + "grad_norm": 26.800430297851562, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.8308136463165283, + "logits/rejected": -0.8038866519927979, + "logps/chosen": -619.2677001953125, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -978.33447265625, + "loss": 1.0823, + "margin_dpo/margin_mean": 343.71856689453125, + "margin_dpo/margin_std": 436.3647155761719, + "step": 502 + }, + { + "KL/chosen_KL_mean": -547.663330078125, + "KL/mean": -710.1171875, + "KL/rejected_KL_mean": -872.571044921875, + "KL/std": 404.2541809082031, + "epoch": 0.7386196769456681, + "fcm_dpo/beta": 0.001180183608084917, + "fcm_dpo/delta": 0.017100892961025238, + "fcm_dpo/margin": 324.90771484375, + "fcm_dpo/q_t": 0.41350266337394714, + "grad_norm": 32.493919372558594, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.7998204231262207, + "logits/rejected": -0.7804523706436157, + "logps/chosen": -602.74169921875, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -955.0765380859375, + "loss": 1.1058, + "margin_dpo/margin_mean": 324.90771484375, + "margin_dpo/margin_std": 457.2688293457031, + "step": 503 + }, + { + "KL/chosen_KL_mean": -594.047607421875, + "KL/mean": -796.29931640625, + "KL/rejected_KL_mean": -998.551025390625, + "KL/std": 500.2449645996094, + "epoch": 0.7400881057268722, + "fcm_dpo/beta": 0.0011717536253854632, + "fcm_dpo/delta": -0.07761284708976746, + "fcm_dpo/margin": 404.5033874511719, + "fcm_dpo/q_t": 0.39360561966896057, + "grad_norm": 34.34120178222656, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.849440336227417, + "logits/rejected": -0.8721122741699219, + "logps/chosen": -654.0133666992188, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76212310791016, + "logps/rejected": -1102.3131103515625, + "loss": 1.0663, + "margin_dpo/margin_mean": 404.50335693359375, + "margin_dpo/margin_std": 555.4103393554688, + "step": 504 + }, + { + "KL/chosen_KL_mean": -663.4384155273438, + "KL/mean": -768.9776611328125, + "KL/rejected_KL_mean": -874.516845703125, + "KL/std": 459.5237731933594, + "epoch": 0.7415565345080763, + "fcm_dpo/beta": 0.0011943629942834377, + "fcm_dpo/delta": 0.15151187777519226, + "fcm_dpo/margin": 211.07833862304688, + "fcm_dpo/q_t": 0.4423941969871521, + "grad_norm": 45.66891098022461, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.8771257400512695, + "logits/rejected": -0.8454539775848389, + "logps/chosen": -739.5933227539062, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -963.1021728515625, + "loss": 1.2136, + "margin_dpo/margin_mean": 211.07835388183594, + "margin_dpo/margin_std": 428.6888427734375, + "step": 505 + }, + { + "KL/chosen_KL_mean": -519.6373291015625, + "KL/mean": -697.4900512695312, + "KL/rejected_KL_mean": -875.3427734375, + "KL/std": 440.137939453125, + "epoch": 0.7430249632892805, + "fcm_dpo/beta": 0.00119347358122468, + "fcm_dpo/delta": -0.025928327813744545, + "fcm_dpo/margin": 355.7054748535156, + "fcm_dpo/q_t": 0.40311557054519653, + "grad_norm": 24.074623107910156, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.8389706611633301, + "logits/rejected": -0.8414362668991089, + "logps/chosen": -568.5978393554688, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -953.7578125, + "loss": 1.0838, + "margin_dpo/margin_mean": 355.7054748535156, + "margin_dpo/margin_std": 483.68572998046875, + "step": 506 + }, + { + "KL/chosen_KL_mean": -560.86669921875, + "KL/mean": -723.412841796875, + "KL/rejected_KL_mean": -885.958984375, + "KL/std": 489.5423583984375, + "epoch": 0.7444933920704846, + "fcm_dpo/beta": 0.001195873599499464, + "fcm_dpo/delta": 0.011642876081168652, + "fcm_dpo/margin": 325.09228515625, + "fcm_dpo/q_t": 0.4149119257926941, + "grad_norm": 34.941505432128906, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.8597081899642944, + "logits/rejected": -0.8631129264831543, + "logps/chosen": -627.6681518554688, + "logps/ref_chosen": -66.80149841308594, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -981.3319091796875, + "loss": 1.1265, + "margin_dpo/margin_mean": 325.09228515625, + "margin_dpo/margin_std": 534.9813232421875, + "step": 507 + }, + { + "KL/chosen_KL_mean": -605.2239990234375, + "KL/mean": -735.3968505859375, + "KL/rejected_KL_mean": -865.5697021484375, + "KL/std": 466.6279296875, + "epoch": 0.7459618208516887, + "fcm_dpo/beta": 0.0012166362721472979, + "fcm_dpo/delta": 0.08571073412895203, + "fcm_dpo/margin": 260.345703125, + "fcm_dpo/q_t": 0.4304507374763489, + "grad_norm": 38.527687072753906, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.8243334889411926, + "logits/rejected": -0.8081272840499878, + "logps/chosen": -676.5274658203125, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -961.197265625, + "loss": 1.2015, + "margin_dpo/margin_mean": 260.345703125, + "margin_dpo/margin_std": 558.1546630859375, + "step": 508 + }, + { + "KL/chosen_KL_mean": -494.65545654296875, + "KL/mean": -636.5025024414062, + "KL/rejected_KL_mean": -778.3495483398438, + "KL/std": 360.523193359375, + "epoch": 0.7474302496328928, + "fcm_dpo/beta": 0.001233469694852829, + "fcm_dpo/delta": 0.05139687657356262, + "fcm_dpo/margin": 283.694091796875, + "fcm_dpo/q_t": 0.4210923910140991, + "grad_norm": 32.919189453125, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.7569400668144226, + "logits/rejected": -0.7431646585464478, + "logps/chosen": -558.4744262695312, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -861.60595703125, + "loss": 1.1423, + "margin_dpo/margin_mean": 283.6940612792969, + "margin_dpo/margin_std": 457.988525390625, + "step": 509 + }, + { + "KL/chosen_KL_mean": -584.29296875, + "KL/mean": -770.234375, + "KL/rejected_KL_mean": -956.17578125, + "KL/std": 406.34039306640625, + "epoch": 0.748898678414097, + "fcm_dpo/beta": 0.001223585568368435, + "fcm_dpo/delta": -0.05761527270078659, + "fcm_dpo/margin": 371.8828125, + "fcm_dpo/q_t": 0.39347004890441895, + "grad_norm": 34.82098388671875, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.7776767611503601, + "logits/rejected": -0.7976189851760864, + "logps/chosen": -636.17138671875, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -1058.94091796875, + "loss": 1.0368, + "margin_dpo/margin_mean": 371.8828125, + "margin_dpo/margin_std": 393.40496826171875, + "step": 510 + }, + { + "KL/chosen_KL_mean": -544.2560424804688, + "KL/mean": -698.630615234375, + "KL/rejected_KL_mean": -853.0051879882812, + "KL/std": 448.10504150390625, + "epoch": 0.750367107195301, + "fcm_dpo/beta": 0.0012225550599396229, + "fcm_dpo/delta": 0.0234109815210104, + "fcm_dpo/margin": 308.7491455078125, + "fcm_dpo/q_t": 0.41429954767227173, + "grad_norm": 30.70000648498535, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.8361300230026245, + "logits/rejected": -0.8324530124664307, + "logps/chosen": -604.494140625, + "logps/ref_chosen": -60.23811721801758, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -945.8619384765625, + "loss": 1.1419, + "margin_dpo/margin_mean": 308.7491455078125, + "margin_dpo/margin_std": 537.5291748046875, + "step": 511 + }, + { + "KL/chosen_KL_mean": -450.8485107421875, + "KL/mean": -653.24609375, + "KL/rejected_KL_mean": -855.6436157226562, + "KL/std": 413.8804931640625, + "epoch": 0.7518355359765051, + "fcm_dpo/beta": 0.0012053523678332567, + "fcm_dpo/delta": -0.09257997572422028, + "fcm_dpo/margin": 404.79510498046875, + "fcm_dpo/q_t": 0.3863917589187622, + "grad_norm": 36.188262939453125, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.8050196170806885, + "logits/rejected": -0.8159193396568298, + "logps/chosen": -505.7539978027344, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -937.51953125, + "loss": 1.0147, + "margin_dpo/margin_mean": 404.79510498046875, + "margin_dpo/margin_std": 416.42584228515625, + "step": 512 + }, + { + "KL/chosen_KL_mean": -559.5546875, + "KL/mean": -700.3875732421875, + "KL/rejected_KL_mean": -841.2203979492188, + "KL/std": 382.072021484375, + "epoch": 0.7533039647577092, + "fcm_dpo/beta": 0.0011912956833839417, + "fcm_dpo/delta": -0.04120471701025963, + "fcm_dpo/margin": 281.6656799316406, + "fcm_dpo/q_t": 0.42267322540283203, + "grad_norm": 37.652217864990234, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.8725820183753967, + "logits/rejected": -0.8468344211578369, + "logps/chosen": -624.47119140625, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -917.2828369140625, + "loss": 1.1508, + "margin_dpo/margin_mean": 281.6656494140625, + "margin_dpo/margin_std": 448.5667724609375, + "step": 513 + }, + { + "KL/chosen_KL_mean": -589.4393920898438, + "KL/mean": -719.176513671875, + "KL/rejected_KL_mean": -848.9135131835938, + "KL/std": 411.193359375, + "epoch": 0.7547723935389133, + "fcm_dpo/beta": 0.0012082626344636083, + "fcm_dpo/delta": 0.08919873833656311, + "fcm_dpo/margin": 259.47412109375, + "fcm_dpo/q_t": 0.4297820031642914, + "grad_norm": 31.276504516601562, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.8355896472930908, + "logits/rejected": -0.809748649597168, + "logps/chosen": -663.6689453125, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -927.8590698242188, + "loss": 1.1703, + "margin_dpo/margin_mean": 259.4741516113281, + "margin_dpo/margin_std": 465.32330322265625, + "step": 514 + }, + { + "KL/chosen_KL_mean": -501.87353515625, + "KL/mean": -626.3300170898438, + "KL/rejected_KL_mean": -750.7864990234375, + "KL/std": 381.322021484375, + "epoch": 0.7562408223201175, + "fcm_dpo/beta": 0.0012302729301154613, + "fcm_dpo/delta": 0.09674014896154404, + "fcm_dpo/margin": 248.9129638671875, + "fcm_dpo/q_t": 0.4292389154434204, + "grad_norm": 41.97957992553711, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.8100356459617615, + "logits/rejected": -0.8176305294036865, + "logps/chosen": -552.275146484375, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -837.88427734375, + "loss": 1.1746, + "margin_dpo/margin_mean": 248.9129638671875, + "margin_dpo/margin_std": 451.3743896484375, + "step": 515 + }, + { + "KL/chosen_KL_mean": -512.7012939453125, + "KL/mean": -686.7001953125, + "KL/rejected_KL_mean": -860.69921875, + "KL/std": 412.843017578125, + "epoch": 0.7577092511013216, + "fcm_dpo/beta": 0.0012345185969024897, + "fcm_dpo/delta": -0.031035784631967545, + "fcm_dpo/margin": 347.9979248046875, + "fcm_dpo/q_t": 0.3999173939228058, + "grad_norm": 31.350547790527344, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.883063018321991, + "logits/rejected": -0.9014164209365845, + "logps/chosen": -577.350830078125, + "logps/ref_chosen": -64.64956665039062, + "logps/ref_rejected": -111.72237396240234, + "logps/rejected": -972.4215698242188, + "loss": 1.0631, + "margin_dpo/margin_mean": 347.9979553222656, + "margin_dpo/margin_std": 409.755126953125, + "step": 516 + }, + { + "KL/chosen_KL_mean": -538.5610961914062, + "KL/mean": -739.44091796875, + "KL/rejected_KL_mean": -940.3209228515625, + "KL/std": 451.02313232421875, + "epoch": 0.7591776798825257, + "fcm_dpo/beta": 0.0012100373860448599, + "fcm_dpo/delta": -0.09078750759363174, + "fcm_dpo/margin": 401.75970458984375, + "fcm_dpo/q_t": 0.39081257581710815, + "grad_norm": 25.801740646362305, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.8275068402290344, + "logits/rejected": -0.8201382160186768, + "logps/chosen": -599.474609375, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -1029.4039306640625, + "loss": 1.0379, + "margin_dpo/margin_mean": 401.759765625, + "margin_dpo/margin_std": 496.04901123046875, + "step": 517 + }, + { + "KL/chosen_KL_mean": -519.912353515625, + "KL/mean": -685.43994140625, + "KL/rejected_KL_mean": -850.967529296875, + "KL/std": 387.7268371582031, + "epoch": 0.7606461086637298, + "fcm_dpo/beta": 0.0012026941403746605, + "fcm_dpo/delta": 0.0016661733388900757, + "fcm_dpo/margin": 331.0551452636719, + "fcm_dpo/q_t": 0.4073007106781006, + "grad_norm": 30.128890991210938, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.8605848550796509, + "logits/rejected": -0.8738881945610046, + "logps/chosen": -577.3682861328125, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -936.2802124023438, + "loss": 1.0897, + "margin_dpo/margin_mean": 331.05511474609375, + "margin_dpo/margin_std": 424.0645751953125, + "step": 518 + }, + { + "KL/chosen_KL_mean": -521.1378784179688, + "KL/mean": -639.3558349609375, + "KL/rejected_KL_mean": -757.57373046875, + "KL/std": 362.4400939941406, + "epoch": 0.762114537444934, + "fcm_dpo/beta": 0.0012314484920352697, + "fcm_dpo/delta": 0.1115046888589859, + "fcm_dpo/margin": 236.4358367919922, + "fcm_dpo/q_t": 0.4326602518558502, + "grad_norm": 43.568424224853516, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.8680436015129089, + "logits/rejected": -0.847466230392456, + "logps/chosen": -595.201171875, + "logps/ref_chosen": -74.06331634521484, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -862.0178833007812, + "loss": 1.193, + "margin_dpo/margin_mean": 236.4358367919922, + "margin_dpo/margin_std": 456.2001953125, + "step": 519 + }, + { + "KL/chosen_KL_mean": -569.1138916015625, + "KL/mean": -716.5203857421875, + "KL/rejected_KL_mean": -863.9268188476562, + "KL/std": 410.4988098144531, + "epoch": 0.7635829662261381, + "fcm_dpo/beta": 0.001238158904016018, + "fcm_dpo/delta": 0.03627597913146019, + "fcm_dpo/margin": 294.8129577636719, + "fcm_dpo/q_t": 0.41828474402427673, + "grad_norm": 27.024534225463867, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.779923677444458, + "logits/rejected": -0.7774548530578613, + "logps/chosen": -639.4136962890625, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -963.908203125, + "loss": 1.1323, + "margin_dpo/margin_mean": 294.8129577636719, + "margin_dpo/margin_std": 456.40606689453125, + "step": 520 + }, + { + "KL/chosen_KL_mean": -496.0313415527344, + "KL/mean": -657.4324951171875, + "KL/rejected_KL_mean": -818.833740234375, + "KL/std": 417.3710021972656, + "epoch": 0.7650513950073421, + "fcm_dpo/beta": 0.0012491261586546898, + "fcm_dpo/delta": -0.0038999132812023163, + "fcm_dpo/margin": 322.8023681640625, + "fcm_dpo/q_t": 0.40750136971473694, + "grad_norm": 31.882793426513672, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.8779969215393066, + "logits/rejected": -0.8690969944000244, + "logps/chosen": -554.1742553710938, + "logps/ref_chosen": -58.14292526245117, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -902.1143188476562, + "loss": 1.089, + "margin_dpo/margin_mean": 322.8023681640625, + "margin_dpo/margin_std": 418.15899658203125, + "step": 521 + }, + { + "KL/chosen_KL_mean": -583.5753784179688, + "KL/mean": -740.7099609375, + "KL/rejected_KL_mean": -897.844482421875, + "KL/std": 467.26300048828125, + "epoch": 0.7665198237885462, + "fcm_dpo/beta": 0.001243784325197339, + "fcm_dpo/delta": 0.009488995186984539, + "fcm_dpo/margin": 314.26910400390625, + "fcm_dpo/q_t": 0.4114811420440674, + "grad_norm": 31.201757431030273, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.8858053088188171, + "logits/rejected": -0.8832094669342041, + "logps/chosen": -630.123046875, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -963.8583984375, + "loss": 1.1203, + "margin_dpo/margin_mean": 314.26910400390625, + "margin_dpo/margin_std": 489.2512512207031, + "step": 522 + }, + { + "KL/chosen_KL_mean": -587.31884765625, + "KL/mean": -753.39404296875, + "KL/rejected_KL_mean": -919.4691772460938, + "KL/std": 486.9326477050781, + "epoch": 0.7679882525697503, + "fcm_dpo/beta": 0.0012416969984769821, + "fcm_dpo/delta": -0.012975066900253296, + "fcm_dpo/margin": 332.15032958984375, + "fcm_dpo/q_t": 0.40742552280426025, + "grad_norm": 33.407474517822266, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.9253371953964233, + "logits/rejected": -0.9181835055351257, + "logps/chosen": -649.0885009765625, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -1003.2305908203125, + "loss": 1.1124, + "margin_dpo/margin_mean": 332.15032958984375, + "margin_dpo/margin_std": 522.383056640625, + "step": 523 + }, + { + "KL/chosen_KL_mean": -593.376953125, + "KL/mean": -751.2468872070312, + "KL/rejected_KL_mean": -909.1168212890625, + "KL/std": 469.7861633300781, + "epoch": 0.7694566813509545, + "fcm_dpo/beta": 0.0012377724051475525, + "fcm_dpo/delta": 0.009213726967573166, + "fcm_dpo/margin": 315.73992919921875, + "fcm_dpo/q_t": 0.40860164165496826, + "grad_norm": 39.42531967163086, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.9645393490791321, + "logits/rejected": -0.9483359456062317, + "logps/chosen": -671.448974609375, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -990.4188232421875, + "loss": 1.114, + "margin_dpo/margin_mean": 315.7398986816406, + "margin_dpo/margin_std": 464.941162109375, + "step": 524 + }, + { + "KL/chosen_KL_mean": -537.3798828125, + "KL/mean": -752.6138305664062, + "KL/rejected_KL_mean": -967.847900390625, + "KL/std": 474.93743896484375, + "epoch": 0.7709251101321586, + "fcm_dpo/beta": 0.0012156711891293526, + "fcm_dpo/delta": -0.13079476356506348, + "fcm_dpo/margin": 430.468017578125, + "fcm_dpo/q_t": 0.3830450177192688, + "grad_norm": 28.412078857421875, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.8482464551925659, + "logits/rejected": -0.8838850259780884, + "logps/chosen": -588.2077026367188, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05294036865234, + "logps/rejected": -1067.90087890625, + "loss": 1.0139, + "margin_dpo/margin_mean": 430.468017578125, + "margin_dpo/margin_std": 507.2789306640625, + "step": 525 + }, + { + "KL/chosen_KL_mean": -593.8212890625, + "KL/mean": -805.8401489257812, + "KL/rejected_KL_mean": -1017.8590087890625, + "KL/std": 471.7684020996094, + "epoch": 0.7723935389133627, + "fcm_dpo/beta": 0.0011959581170231104, + "fcm_dpo/delta": -0.11271873861551285, + "fcm_dpo/margin": 424.0378112792969, + "fcm_dpo/q_t": 0.384555459022522, + "grad_norm": 30.65778923034668, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.906538724899292, + "logits/rejected": -0.895421028137207, + "logps/chosen": -656.988525390625, + "logps/ref_chosen": -63.167236328125, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -1104.16845703125, + "loss": 1.0352, + "margin_dpo/margin_mean": 424.0378112792969, + "margin_dpo/margin_std": 526.3082885742188, + "step": 526 + }, + { + "KL/chosen_KL_mean": -620.7032470703125, + "KL/mean": -759.3160400390625, + "KL/rejected_KL_mean": -897.9288330078125, + "KL/std": 481.2168273925781, + "epoch": 0.7738619676945668, + "fcm_dpo/beta": 0.0011956689413636923, + "fcm_dpo/delta": 0.07093732804059982, + "fcm_dpo/margin": 277.2255859375, + "fcm_dpo/q_t": 0.4224342703819275, + "grad_norm": 31.139324188232422, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -1.0832974910736084, + "logits/rejected": -1.045976996421814, + "logps/chosen": -691.8319702148438, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -976.2713623046875, + "loss": 1.138, + "margin_dpo/margin_mean": 277.2255859375, + "margin_dpo/margin_std": 403.221435546875, + "step": 527 + }, + { + "KL/chosen_KL_mean": -624.593505859375, + "KL/mean": -788.0255126953125, + "KL/rejected_KL_mean": -951.45751953125, + "KL/std": 467.02947998046875, + "epoch": 0.775330396475771, + "fcm_dpo/beta": 0.0012017192784696817, + "fcm_dpo/delta": 0.007483818102627993, + "fcm_dpo/margin": 326.864013671875, + "fcm_dpo/q_t": 0.41412389278411865, + "grad_norm": 40.499786376953125, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -0.9295456409454346, + "logits/rejected": -0.9090088605880737, + "logps/chosen": -692.6829833984375, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -1045.3675537109375, + "loss": 1.1349, + "margin_dpo/margin_mean": 326.864013671875, + "margin_dpo/margin_std": 558.260986328125, + "step": 528 + }, + { + "KL/chosen_KL_mean": -722.912109375, + "KL/mean": -839.7539672851562, + "KL/rejected_KL_mean": -956.5958251953125, + "KL/std": 438.76708984375, + "epoch": 0.7767988252569751, + "fcm_dpo/beta": 0.0012046921765431762, + "fcm_dpo/delta": 0.005615768022835255, + "fcm_dpo/margin": 233.68368530273438, + "fcm_dpo/q_t": 0.43421024084091187, + "grad_norm": 66.41038513183594, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -0.9495022296905518, + "logits/rejected": -0.9608061909675598, + "logps/chosen": -778.487060546875, + "logps/ref_chosen": -55.57495880126953, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -1045.804931640625, + "loss": 1.2262, + "margin_dpo/margin_mean": 233.68368530273438, + "margin_dpo/margin_std": 538.73583984375, + "step": 529 + }, + { + "KL/chosen_KL_mean": -588.6814575195312, + "KL/mean": -773.9688720703125, + "KL/rejected_KL_mean": -959.25634765625, + "KL/std": 510.5151672363281, + "epoch": 0.7782672540381792, + "fcm_dpo/beta": 0.001198010751977563, + "fcm_dpo/delta": -0.04596946761012077, + "fcm_dpo/margin": 370.57501220703125, + "fcm_dpo/q_t": 0.40217673778533936, + "grad_norm": 55.660064697265625, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.8760310411453247, + "logits/rejected": -0.8702448606491089, + "logps/chosen": -636.2828369140625, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -1046.5408935546875, + "loss": 1.1169, + "margin_dpo/margin_mean": 370.574951171875, + "margin_dpo/margin_std": 630.6913452148438, + "step": 530 + }, + { + "KL/chosen_KL_mean": -657.83935546875, + "KL/mean": -809.4407958984375, + "KL/rejected_KL_mean": -961.042236328125, + "KL/std": 517.9159545898438, + "epoch": 0.7797356828193832, + "fcm_dpo/beta": 0.0012029436184093356, + "fcm_dpo/delta": 0.03640556335449219, + "fcm_dpo/margin": 303.2028503417969, + "fcm_dpo/q_t": 0.4182535409927368, + "grad_norm": 38.57360076904297, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -0.9681419134140015, + "logits/rejected": -0.9443149566650391, + "logps/chosen": -713.08544921875, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -1031.648193359375, + "loss": 1.1404, + "margin_dpo/margin_mean": 303.2028503417969, + "margin_dpo/margin_std": 503.85516357421875, + "step": 531 + }, + { + "KL/chosen_KL_mean": -635.9500732421875, + "KL/mean": -814.24462890625, + "KL/rejected_KL_mean": -992.5391845703125, + "KL/std": 492.651123046875, + "epoch": 0.7812041116005873, + "fcm_dpo/beta": 0.0011894925264641643, + "fcm_dpo/delta": -0.026376843452453613, + "fcm_dpo/margin": 356.58917236328125, + "fcm_dpo/q_t": 0.405168741941452, + "grad_norm": 54.94654846191406, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -0.8919358849525452, + "logits/rejected": -0.8668221235275269, + "logps/chosen": -706.236083984375, + "logps/ref_chosen": -70.28601837158203, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -1079.1304931640625, + "loss": 1.12, + "margin_dpo/margin_mean": 356.58917236328125, + "margin_dpo/margin_std": 579.14501953125, + "step": 532 + }, + { + "KL/chosen_KL_mean": -578.204345703125, + "KL/mean": -705.219970703125, + "KL/rejected_KL_mean": -832.2355346679688, + "KL/std": 433.0811767578125, + "epoch": 0.7826725403817915, + "fcm_dpo/beta": 0.0012182076461613178, + "fcm_dpo/delta": 0.09259242564439774, + "fcm_dpo/margin": 254.03118896484375, + "fcm_dpo/q_t": 0.4302961230278015, + "grad_norm": 44.284828186035156, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -0.8935759663581848, + "logits/rejected": -0.8628044128417969, + "logps/chosen": -643.0594482421875, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -908.8236083984375, + "loss": 1.1921, + "margin_dpo/margin_mean": 254.03115844726562, + "margin_dpo/margin_std": 512.8229370117188, + "step": 533 + }, + { + "KL/chosen_KL_mean": -593.3036499023438, + "KL/mean": -759.54248046875, + "KL/rejected_KL_mean": -925.7813110351562, + "KL/std": 448.52166748046875, + "epoch": 0.7841409691629956, + "fcm_dpo/beta": 0.0012101430911570787, + "fcm_dpo/delta": -0.0032868273556232452, + "fcm_dpo/margin": 332.4776611328125, + "fcm_dpo/q_t": 0.4076574742794037, + "grad_norm": 33.67475509643555, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -0.8921518325805664, + "logits/rejected": -0.8880842328071594, + "logps/chosen": -653.4230346679688, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -1004.3247680664062, + "loss": 1.0979, + "margin_dpo/margin_mean": 332.4776611328125, + "margin_dpo/margin_std": 454.71746826171875, + "step": 534 + }, + { + "KL/chosen_KL_mean": -496.10723876953125, + "KL/mean": -687.46923828125, + "KL/rejected_KL_mean": -878.831298828125, + "KL/std": 442.2324523925781, + "epoch": 0.7856093979441997, + "fcm_dpo/beta": 0.0012131070252507925, + "fcm_dpo/delta": -0.06780680269002914, + "fcm_dpo/margin": 382.72406005859375, + "fcm_dpo/q_t": 0.3943653106689453, + "grad_norm": 31.497711181640625, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.8254178166389465, + "logits/rejected": -0.8272514343261719, + "logps/chosen": -550.4375, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -975.138916015625, + "loss": 1.0524, + "margin_dpo/margin_mean": 382.72406005859375, + "margin_dpo/margin_std": 464.69305419921875, + "step": 535 + }, + { + "KL/chosen_KL_mean": -460.1872863769531, + "KL/mean": -684.6822509765625, + "KL/rejected_KL_mean": -909.1771240234375, + "KL/std": 509.9305114746094, + "epoch": 0.7870778267254038, + "fcm_dpo/beta": 0.001176601741462946, + "fcm_dpo/delta": -0.13552269339561462, + "fcm_dpo/margin": 448.9898681640625, + "fcm_dpo/q_t": 0.3865072429180145, + "grad_norm": 41.1716194152832, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.8181933164596558, + "logits/rejected": -0.8456603288650513, + "logps/chosen": -507.267822265625, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -998.2750244140625, + "loss": 1.0291, + "margin_dpo/margin_mean": 448.9898376464844, + "margin_dpo/margin_std": 610.5261840820312, + "step": 536 + }, + { + "KL/chosen_KL_mean": -449.18890380859375, + "KL/mean": -613.4636840820312, + "KL/rejected_KL_mean": -777.7384033203125, + "KL/std": 419.79339599609375, + "epoch": 0.788546255506608, + "fcm_dpo/beta": 0.0011767192045226693, + "fcm_dpo/delta": 0.013456817716360092, + "fcm_dpo/margin": 328.549560546875, + "fcm_dpo/q_t": 0.4111158847808838, + "grad_norm": 32.44279861450195, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.8254159688949585, + "logits/rejected": -0.7971071004867554, + "logps/chosen": -506.9363708496094, + "logps/ref_chosen": -57.747467041015625, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -848.1768188476562, + "loss": 1.1176, + "margin_dpo/margin_mean": 328.5495910644531, + "margin_dpo/margin_std": 493.1300048828125, + "step": 537 + }, + { + "KL/chosen_KL_mean": -591.84228515625, + "KL/mean": -767.7392578125, + "KL/rejected_KL_mean": -943.63623046875, + "KL/std": 430.4447021484375, + "epoch": 0.7900146842878121, + "fcm_dpo/beta": 0.0011678216978907585, + "fcm_dpo/delta": -0.011409275233745575, + "fcm_dpo/margin": 351.79400634765625, + "fcm_dpo/q_t": 0.4062436521053314, + "grad_norm": 33.99517059326172, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.8461936712265015, + "logits/rejected": -0.826158881187439, + "logps/chosen": -658.2581787109375, + "logps/ref_chosen": -66.41594696044922, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -1027.8642578125, + "loss": 1.0812, + "margin_dpo/margin_mean": 351.7939758300781, + "margin_dpo/margin_std": 454.0805358886719, + "step": 538 + }, + { + "KL/chosen_KL_mean": -514.8009643554688, + "KL/mean": -671.9696655273438, + "KL/rejected_KL_mean": -829.1383666992188, + "KL/std": 363.4195861816406, + "epoch": 0.7914831130690162, + "fcm_dpo/beta": 0.0011770533164963126, + "fcm_dpo/delta": 0.031044667586684227, + "fcm_dpo/margin": 314.33740234375, + "fcm_dpo/q_t": 0.41412806510925293, + "grad_norm": 38.721561431884766, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -0.9151267409324646, + "logits/rejected": -0.8990967273712158, + "logps/chosen": -573.2938232421875, + "logps/ref_chosen": -58.492855072021484, + "logps/ref_rejected": -91.85395050048828, + "logps/rejected": -920.9923095703125, + "loss": 1.1041, + "margin_dpo/margin_mean": 314.33740234375, + "margin_dpo/margin_std": 406.0032958984375, + "step": 539 + }, + { + "KL/chosen_KL_mean": -513.6804809570312, + "KL/mean": -730.3675537109375, + "KL/rejected_KL_mean": -947.0548095703125, + "KL/std": 456.95489501953125, + "epoch": 0.7929515418502202, + "fcm_dpo/beta": 0.0011629726504907012, + "fcm_dpo/delta": -0.10943492501974106, + "fcm_dpo/margin": 433.374267578125, + "fcm_dpo/q_t": 0.3882465958595276, + "grad_norm": 41.09721374511719, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.8337998390197754, + "logits/rejected": -0.8539774417877197, + "logps/chosen": -577.1629638671875, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.42999267578125, + "logps/rejected": -1063.4847412109375, + "loss": 1.0355, + "margin_dpo/margin_mean": 433.374267578125, + "margin_dpo/margin_std": 532.08251953125, + "step": 540 + }, + { + "KL/chosen_KL_mean": -621.1068725585938, + "KL/mean": -744.4969482421875, + "KL/rejected_KL_mean": -867.8870849609375, + "KL/std": 408.0279846191406, + "epoch": 0.7944199706314243, + "fcm_dpo/beta": 0.0011470350436866283, + "fcm_dpo/delta": -0.01891069859266281, + "fcm_dpo/margin": 246.7801513671875, + "fcm_dpo/q_t": 0.43513962626457214, + "grad_norm": 53.4821662902832, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -0.9593532085418701, + "logits/rejected": -0.9463214874267578, + "logps/chosen": -699.38720703125, + "logps/ref_chosen": -78.28036499023438, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -971.1607666015625, + "loss": 1.206, + "margin_dpo/margin_mean": 246.7801513671875, + "margin_dpo/margin_std": 501.5079040527344, + "step": 541 + }, + { + "KL/chosen_KL_mean": -533.010498046875, + "KL/mean": -752.8260498046875, + "KL/rejected_KL_mean": -972.6415405273438, + "KL/std": 486.2320556640625, + "epoch": 0.7958883994126285, + "fcm_dpo/beta": 0.0011208573123440146, + "fcm_dpo/delta": -0.09979057312011719, + "fcm_dpo/margin": 439.63116455078125, + "fcm_dpo/q_t": 0.38958051800727844, + "grad_norm": 61.64540100097656, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -0.9522601962089539, + "logits/rejected": -0.9825873374938965, + "logps/chosen": -590.4954223632812, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -1069.11669921875, + "loss": 1.0499, + "margin_dpo/margin_mean": 439.631103515625, + "margin_dpo/margin_std": 572.5598754882812, + "step": 542 + }, + { + "KL/chosen_KL_mean": -616.3714599609375, + "KL/mean": -763.2518310546875, + "KL/rejected_KL_mean": -910.132080078125, + "KL/std": 572.7112426757812, + "epoch": 0.7973568281938326, + "fcm_dpo/beta": 0.001132916659116745, + "fcm_dpo/delta": 0.06954119354486465, + "fcm_dpo/margin": 293.7606506347656, + "fcm_dpo/q_t": 0.4342951774597168, + "grad_norm": 34.3123893737793, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.8914676904678345, + "logits/rejected": -0.9207860231399536, + "logps/chosen": -676.989013671875, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -1015.7310791015625, + "loss": 1.207, + "margin_dpo/margin_mean": 293.7606506347656, + "margin_dpo/margin_std": 677.75341796875, + "step": 543 + }, + { + "KL/chosen_KL_mean": -620.8755493164062, + "KL/mean": -799.795654296875, + "KL/rejected_KL_mean": -978.7156372070312, + "KL/std": 466.3764343261719, + "epoch": 0.7988252569750367, + "fcm_dpo/beta": 0.0011378147173672915, + "fcm_dpo/delta": -0.007483053486794233, + "fcm_dpo/margin": 357.8401184082031, + "fcm_dpo/q_t": 0.40781134366989136, + "grad_norm": 31.13963508605957, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -0.8915605545043945, + "logits/rejected": -0.9122974872589111, + "logps/chosen": -680.517822265625, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -1079.67041015625, + "loss": 1.1041, + "margin_dpo/margin_mean": 357.8401184082031, + "margin_dpo/margin_std": 532.9642333984375, + "step": 544 + }, + { + "KL/chosen_KL_mean": -593.229736328125, + "KL/mean": -805.3840942382812, + "KL/rejected_KL_mean": -1017.53857421875, + "KL/std": 472.28790283203125, + "epoch": 0.8002936857562408, + "fcm_dpo/beta": 0.0011245384812355042, + "fcm_dpo/delta": -0.08103551715612411, + "fcm_dpo/margin": 424.308837890625, + "fcm_dpo/q_t": 0.39294663071632385, + "grad_norm": 26.347267150878906, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.8452168703079224, + "logits/rejected": -0.8719925284385681, + "logps/chosen": -660.8782958984375, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -1113.446533203125, + "loss": 1.0526, + "margin_dpo/margin_mean": 424.308837890625, + "margin_dpo/margin_std": 552.90673828125, + "step": 545 + }, + { + "KL/chosen_KL_mean": -566.79052734375, + "KL/mean": -708.225830078125, + "KL/rejected_KL_mean": -849.6610717773438, + "KL/std": 417.33697509765625, + "epoch": 0.801762114537445, + "fcm_dpo/beta": 0.0011334663722664118, + "fcm_dpo/delta": 0.08203422278165817, + "fcm_dpo/margin": 282.87054443359375, + "fcm_dpo/q_t": 0.4246765971183777, + "grad_norm": 36.055816650390625, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.8745754957199097, + "logits/rejected": -0.8525873422622681, + "logps/chosen": -617.5347900390625, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -931.52734375, + "loss": 1.1546, + "margin_dpo/margin_mean": 282.87054443359375, + "margin_dpo/margin_std": 451.3182373046875, + "step": 546 + }, + { + "KL/chosen_KL_mean": -571.3265380859375, + "KL/mean": -752.8795166015625, + "KL/rejected_KL_mean": -934.4325561523438, + "KL/std": 474.61773681640625, + "epoch": 0.8032305433186491, + "fcm_dpo/beta": 0.0011374622117727995, + "fcm_dpo/delta": -0.013586894609034061, + "fcm_dpo/margin": 363.1061096191406, + "fcm_dpo/q_t": 0.4062024652957916, + "grad_norm": 41.295894622802734, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.863783597946167, + "logits/rejected": -0.8649648427963257, + "logps/chosen": -645.0142211914062, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -1025.19384765625, + "loss": 1.0906, + "margin_dpo/margin_mean": 363.1061096191406, + "margin_dpo/margin_std": 503.5409240722656, + "step": 547 + }, + { + "KL/chosen_KL_mean": -572.3947143554688, + "KL/mean": -745.8720703125, + "KL/rejected_KL_mean": -919.3494873046875, + "KL/std": 484.0094909667969, + "epoch": 0.8046989720998532, + "fcm_dpo/beta": 0.0011355069000273943, + "fcm_dpo/delta": 0.006264576222747564, + "fcm_dpo/margin": 346.9548034667969, + "fcm_dpo/q_t": 0.41354212164878845, + "grad_norm": 29.72757339477539, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -0.9228535294532776, + "logits/rejected": -0.922650933265686, + "logps/chosen": -637.6410522460938, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -1013.467529296875, + "loss": 1.1081, + "margin_dpo/margin_mean": 346.9548034667969, + "margin_dpo/margin_std": 528.34912109375, + "step": 548 + }, + { + "KL/chosen_KL_mean": -588.3477783203125, + "KL/mean": -716.7921142578125, + "KL/rejected_KL_mean": -845.236572265625, + "KL/std": 395.0334777832031, + "epoch": 0.8061674008810573, + "fcm_dpo/beta": 0.0011380038922652602, + "fcm_dpo/delta": 0.00906070601195097, + "fcm_dpo/margin": 256.88873291015625, + "fcm_dpo/q_t": 0.4329938292503357, + "grad_norm": 34.956844329833984, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -0.9303746223449707, + "logits/rejected": -0.9102168083190918, + "logps/chosen": -637.5601806640625, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -919.1468505859375, + "loss": 1.1825, + "margin_dpo/margin_mean": 256.88873291015625, + "margin_dpo/margin_std": 461.2471008300781, + "step": 549 + }, + { + "KL/chosen_KL_mean": -585.5970458984375, + "KL/mean": -743.171630859375, + "KL/rejected_KL_mean": -900.746337890625, + "KL/std": 431.36700439453125, + "epoch": 0.8076358296622613, + "fcm_dpo/beta": 0.001144929206930101, + "fcm_dpo/delta": 0.04066295921802521, + "fcm_dpo/margin": 315.1492919921875, + "fcm_dpo/q_t": 0.41719305515289307, + "grad_norm": 33.49467468261719, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.8681415319442749, + "logits/rejected": -0.8837727308273315, + "logps/chosen": -642.4039306640625, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -995.8721313476562, + "loss": 1.1245, + "margin_dpo/margin_mean": 315.1493225097656, + "margin_dpo/margin_std": 469.3822021484375, + "step": 550 + }, + { + "KL/chosen_KL_mean": -528.0404663085938, + "KL/mean": -771.3831787109375, + "KL/rejected_KL_mean": -1014.7258911132812, + "KL/std": 475.05487060546875, + "epoch": 0.8091042584434655, + "fcm_dpo/beta": 0.0011208320502191782, + "fcm_dpo/delta": -0.15413454174995422, + "fcm_dpo/margin": 486.6854248046875, + "fcm_dpo/q_t": 0.37404024600982666, + "grad_norm": 60.243988037109375, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.8686560392379761, + "logits/rejected": -0.9161897301673889, + "logps/chosen": -587.1467895507812, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -1126.398681640625, + "loss": 0.9776, + "margin_dpo/margin_mean": 486.6854248046875, + "margin_dpo/margin_std": 475.30499267578125, + "step": 551 + }, + { + "KL/chosen_KL_mean": -493.5096435546875, + "KL/mean": -766.2874145507812, + "KL/rejected_KL_mean": -1039.065185546875, + "KL/std": 549.2742919921875, + "epoch": 0.8105726872246696, + "fcm_dpo/beta": 0.0010729696368798614, + "fcm_dpo/delta": -0.1995118260383606, + "fcm_dpo/margin": 545.5556030273438, + "fcm_dpo/q_t": 0.37025463581085205, + "grad_norm": 43.864139556884766, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.917883574962616, + "logits/rejected": -0.9444681406021118, + "logps/chosen": -555.8642578125, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -1143.6273193359375, + "loss": 0.9757, + "margin_dpo/margin_mean": 545.5556030273438, + "margin_dpo/margin_std": 603.31982421875, + "step": 552 + }, + { + "KL/chosen_KL_mean": -577.623046875, + "KL/mean": -726.396484375, + "KL/rejected_KL_mean": -875.169921875, + "KL/std": 449.4180603027344, + "epoch": 0.8120411160058737, + "fcm_dpo/beta": 0.0010809717932716012, + "fcm_dpo/delta": 0.0810445249080658, + "fcm_dpo/margin": 297.5469055175781, + "fcm_dpo/q_t": 0.4275718331336975, + "grad_norm": 31.865938186645508, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.9152529835700989, + "logits/rejected": -0.9069106578826904, + "logps/chosen": -645.8818359375, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -973.26708984375, + "loss": 1.1534, + "margin_dpo/margin_mean": 297.54693603515625, + "margin_dpo/margin_std": 481.7157897949219, + "step": 553 + }, + { + "KL/chosen_KL_mean": -595.7544555664062, + "KL/mean": -803.332275390625, + "KL/rejected_KL_mean": -1010.9100341796875, + "KL/std": 496.8535461425781, + "epoch": 0.8135095447870778, + "fcm_dpo/beta": 0.001084424089640379, + "fcm_dpo/delta": -0.05283275246620178, + "fcm_dpo/margin": 415.1555480957031, + "fcm_dpo/q_t": 0.4000872075557709, + "grad_norm": 33.93936538696289, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.8999141454696655, + "logits/rejected": -0.862054705619812, + "logps/chosen": -663.7021484375, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -1100.6927490234375, + "loss": 1.0927, + "margin_dpo/margin_mean": 415.1555480957031, + "margin_dpo/margin_std": 624.768798828125, + "step": 554 + }, + { + "KL/chosen_KL_mean": -615.0294189453125, + "KL/mean": -845.592041015625, + "KL/rejected_KL_mean": -1076.154541015625, + "KL/std": 520.6309814453125, + "epoch": 0.8149779735682819, + "fcm_dpo/beta": 0.0010615733917802572, + "fcm_dpo/delta": -0.09408356249332428, + "fcm_dpo/margin": 461.125244140625, + "fcm_dpo/q_t": 0.3936702311038971, + "grad_norm": 35.487144470214844, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -0.9481945037841797, + "logits/rejected": -0.9943474531173706, + "logps/chosen": -668.35986328125, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -1184.634033203125, + "loss": 1.0645, + "margin_dpo/margin_mean": 461.125244140625, + "margin_dpo/margin_std": 665.6109008789062, + "step": 555 + }, + { + "KL/chosen_KL_mean": -569.9747314453125, + "KL/mean": -710.6820678710938, + "KL/rejected_KL_mean": -851.389404296875, + "KL/std": 405.1685791015625, + "epoch": 0.8164464023494861, + "fcm_dpo/beta": 0.0010712645016610622, + "fcm_dpo/delta": 0.10165860503911972, + "fcm_dpo/margin": 281.4146728515625, + "fcm_dpo/q_t": 0.43035784363746643, + "grad_norm": 31.20359230041504, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -0.9110164642333984, + "logits/rejected": -0.8961154222488403, + "logps/chosen": -628.6192016601562, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -952.7298583984375, + "loss": 1.1538, + "margin_dpo/margin_mean": 281.4146728515625, + "margin_dpo/margin_std": 414.3664245605469, + "step": 556 + }, + { + "KL/chosen_KL_mean": -631.4099731445312, + "KL/mean": -833.5421142578125, + "KL/rejected_KL_mean": -1035.67431640625, + "KL/std": 523.793701171875, + "epoch": 0.8179148311306902, + "fcm_dpo/beta": 0.0010692158248275518, + "fcm_dpo/delta": -0.033952295780181885, + "fcm_dpo/margin": 404.26434326171875, + "fcm_dpo/q_t": 0.40289121866226196, + "grad_norm": 51.41856384277344, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -0.9860169887542725, + "logits/rejected": -1.0132906436920166, + "logps/chosen": -699.2506103515625, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93965911865234, + "logps/rejected": -1145.614013671875, + "loss": 1.0935, + "margin_dpo/margin_mean": 404.2643127441406, + "margin_dpo/margin_std": 592.6957397460938, + "step": 557 + }, + { + "KL/chosen_KL_mean": -567.9088745117188, + "KL/mean": -840.1057739257812, + "KL/rejected_KL_mean": -1112.302490234375, + "KL/std": 511.2911376953125, + "epoch": 0.8193832599118943, + "fcm_dpo/beta": 0.0010408093221485615, + "fcm_dpo/delta": -0.17697550356388092, + "fcm_dpo/margin": 544.393798828125, + "fcm_dpo/q_t": 0.3689645528793335, + "grad_norm": 24.727500915527344, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -1.0073204040527344, + "logits/rejected": -1.016093373298645, + "logps/chosen": -630.277099609375, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -1214.463623046875, + "loss": 0.9804, + "margin_dpo/margin_mean": 544.393798828125, + "margin_dpo/margin_std": 575.1395874023438, + "step": 558 + }, + { + "KL/chosen_KL_mean": -625.84619140625, + "KL/mean": -851.332275390625, + "KL/rejected_KL_mean": -1076.818359375, + "KL/std": 508.06561279296875, + "epoch": 0.8208516886930984, + "fcm_dpo/beta": 0.0010242098942399025, + "fcm_dpo/delta": -0.06484264880418777, + "fcm_dpo/margin": 450.97210693359375, + "fcm_dpo/q_t": 0.3939628005027771, + "grad_norm": 27.729312896728516, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -1.085421085357666, + "logits/rejected": -1.071927547454834, + "logps/chosen": -686.5985717773438, + "logps/ref_chosen": -60.752323150634766, + "logps/ref_rejected": -93.44229125976562, + "logps/rejected": -1170.2607421875, + "loss": 1.0467, + "margin_dpo/margin_mean": 450.9721374511719, + "margin_dpo/margin_std": 538.5919799804688, + "step": 559 + }, + { + "KL/chosen_KL_mean": -600.4629516601562, + "KL/mean": -748.181884765625, + "KL/rejected_KL_mean": -895.9007568359375, + "KL/std": 419.6651306152344, + "epoch": 0.8223201174743024, + "fcm_dpo/beta": 0.0010198511881753802, + "fcm_dpo/delta": 0.00034468769445084035, + "fcm_dpo/margin": 295.4377746582031, + "fcm_dpo/q_t": 0.42941996455192566, + "grad_norm": 32.959224700927734, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.8413786888122559, + "logits/rejected": -0.8068987131118774, + "logps/chosen": -658.5667724609375, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -975.8919677734375, + "loss": 1.165, + "margin_dpo/margin_mean": 295.4377746582031, + "margin_dpo/margin_std": 473.3067321777344, + "step": 560 + }, + { + "KL/chosen_KL_mean": -676.5592651367188, + "KL/mean": -825.4444580078125, + "KL/rejected_KL_mean": -974.32958984375, + "KL/std": 466.03192138671875, + "epoch": 0.8237885462555066, + "fcm_dpo/beta": 0.001038446556776762, + "fcm_dpo/delta": 0.09292855858802795, + "fcm_dpo/margin": 297.7703857421875, + "fcm_dpo/q_t": 0.4279605746269226, + "grad_norm": 33.256561279296875, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -0.957642674446106, + "logits/rejected": -0.9741103649139404, + "logps/chosen": -727.47216796875, + "logps/ref_chosen": -50.912879943847656, + "logps/ref_rejected": -99.06856536865234, + "logps/rejected": -1073.398193359375, + "loss": 1.1824, + "margin_dpo/margin_mean": 297.7703857421875, + "margin_dpo/margin_std": 566.9799194335938, + "step": 561 + }, + { + "KL/chosen_KL_mean": -637.7071533203125, + "KL/mean": -828.40576171875, + "KL/rejected_KL_mean": -1019.1043701171875, + "KL/std": 509.77227783203125, + "epoch": 0.8252569750367107, + "fcm_dpo/beta": 0.0010351063683629036, + "fcm_dpo/delta": 0.005036838352680206, + "fcm_dpo/margin": 381.3973388671875, + "fcm_dpo/q_t": 0.4100903272628784, + "grad_norm": 27.82461929321289, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -0.9611387848854065, + "logits/rejected": -0.9594268798828125, + "logps/chosen": -697.1715087890625, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -1115.64697265625, + "loss": 1.1057, + "margin_dpo/margin_mean": 381.3973083496094, + "margin_dpo/margin_std": 549.9033203125, + "step": 562 + }, + { + "KL/chosen_KL_mean": -682.5115966796875, + "KL/mean": -834.73291015625, + "KL/rejected_KL_mean": -986.954345703125, + "KL/std": 534.4413452148438, + "epoch": 0.8267254038179148, + "fcm_dpo/beta": 0.0010367175564169884, + "fcm_dpo/delta": -0.031953129917383194, + "fcm_dpo/margin": 304.4427490234375, + "fcm_dpo/q_t": 0.4237147569656372, + "grad_norm": 37.00297164916992, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -0.9149300456047058, + "logits/rejected": -0.9164028167724609, + "logps/chosen": -743.1197509765625, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -1081.52197265625, + "loss": 1.2024, + "margin_dpo/margin_mean": 304.4427490234375, + "margin_dpo/margin_std": 631.2129516601562, + "step": 563 + }, + { + "KL/chosen_KL_mean": -653.9114990234375, + "KL/mean": -828.191650390625, + "KL/rejected_KL_mean": -1002.471923828125, + "KL/std": 475.3956298828125, + "epoch": 0.8281938325991189, + "fcm_dpo/beta": 0.0010415834840387106, + "fcm_dpo/delta": 0.03815501928329468, + "fcm_dpo/margin": 348.56048583984375, + "fcm_dpo/q_t": 0.41654127836227417, + "grad_norm": 37.090126037597656, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -1.0703301429748535, + "logits/rejected": -1.0528302192687988, + "logps/chosen": -728.1798095703125, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.23818969726562, + "logps/rejected": -1095.7100830078125, + "loss": 1.1179, + "margin_dpo/margin_mean": 348.56048583984375, + "margin_dpo/margin_std": 479.8955078125, + "step": 564 + }, + { + "KL/chosen_KL_mean": -655.407470703125, + "KL/mean": -850.3612060546875, + "KL/rejected_KL_mean": -1045.3148193359375, + "KL/std": 540.4480590820312, + "epoch": 0.8296622613803231, + "fcm_dpo/beta": 0.0010392372496426105, + "fcm_dpo/delta": -0.005461537279188633, + "fcm_dpo/margin": 389.9073486328125, + "fcm_dpo/q_t": 0.4108823537826538, + "grad_norm": 30.26292610168457, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -0.9375953674316406, + "logits/rejected": -0.9292128086090088, + "logps/chosen": -724.4274291992188, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -1131.09375, + "loss": 1.1223, + "margin_dpo/margin_mean": 389.9073486328125, + "margin_dpo/margin_std": 643.8388671875, + "step": 565 + }, + { + "KL/chosen_KL_mean": -634.423095703125, + "KL/mean": -859.91552734375, + "KL/rejected_KL_mean": -1085.407958984375, + "KL/std": 575.044189453125, + "epoch": 0.8311306901615272, + "fcm_dpo/beta": 0.0010359040461480618, + "fcm_dpo/delta": -0.0708489641547203, + "fcm_dpo/margin": 450.98480224609375, + "fcm_dpo/q_t": 0.3965566158294678, + "grad_norm": 29.393285751342773, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -0.9996108412742615, + "logits/rejected": -1.016195297241211, + "logps/chosen": -700.9684448242188, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86932373046875, + "logps/rejected": -1189.2772216796875, + "loss": 1.094, + "margin_dpo/margin_mean": 450.98480224609375, + "margin_dpo/margin_std": 689.2914428710938, + "step": 566 + }, + { + "KL/chosen_KL_mean": -630.6632690429688, + "KL/mean": -796.9432373046875, + "KL/rejected_KL_mean": -963.22314453125, + "KL/std": 397.6266174316406, + "epoch": 0.8325991189427313, + "fcm_dpo/beta": 0.0010342567693442106, + "fcm_dpo/delta": 0.05805526301264763, + "fcm_dpo/margin": 332.55987548828125, + "fcm_dpo/q_t": 0.41991090774536133, + "grad_norm": 36.57158660888672, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -1.0474796295166016, + "logits/rejected": -1.0378050804138184, + "logps/chosen": -683.5215454101562, + "logps/ref_chosen": -52.85829544067383, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -1048.5941162109375, + "loss": 1.1274, + "margin_dpo/margin_mean": 332.55987548828125, + "margin_dpo/margin_std": 469.06707763671875, + "step": 567 + }, + { + "KL/chosen_KL_mean": -598.9827880859375, + "KL/mean": -826.0681762695312, + "KL/rejected_KL_mean": -1053.153564453125, + "KL/std": 466.5391845703125, + "epoch": 0.8340675477239354, + "fcm_dpo/beta": 0.0010257186368107796, + "fcm_dpo/delta": -0.06917618960142136, + "fcm_dpo/margin": 454.1707763671875, + "fcm_dpo/q_t": 0.39098745584487915, + "grad_norm": 33.18759536743164, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -0.9316244125366211, + "logits/rejected": -0.962453305721283, + "logps/chosen": -644.1751708984375, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236907958984, + "logps/rejected": -1142.2459716796875, + "loss": 1.0315, + "margin_dpo/margin_mean": 454.1707763671875, + "margin_dpo/margin_std": 485.6314697265625, + "step": 568 + }, + { + "KL/chosen_KL_mean": -648.8056640625, + "KL/mean": -841.3399658203125, + "KL/rejected_KL_mean": -1033.874267578125, + "KL/std": 573.2427978515625, + "epoch": 0.8355359765051396, + "fcm_dpo/beta": 0.0010291270446032286, + "fcm_dpo/delta": 0.003299180418252945, + "fcm_dpo/margin": 385.0684814453125, + "fcm_dpo/q_t": 0.41084566712379456, + "grad_norm": 40.002628326416016, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -0.9903547763824463, + "logits/rejected": -0.9887925982475281, + "logps/chosen": -712.5262451171875, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -1112.9775390625, + "loss": 1.1275, + "margin_dpo/margin_mean": 385.0685119628906, + "margin_dpo/margin_std": 624.4798583984375, + "step": 569 + }, + { + "KL/chosen_KL_mean": -634.85205078125, + "KL/mean": -803.91943359375, + "KL/rejected_KL_mean": -972.98681640625, + "KL/std": 460.9179992675781, + "epoch": 0.8370044052863436, + "fcm_dpo/beta": 0.0010310852667316794, + "fcm_dpo/delta": 0.053242240101099014, + "fcm_dpo/margin": 338.1347351074219, + "fcm_dpo/q_t": 0.41983291506767273, + "grad_norm": 34.861881256103516, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -1.04459547996521, + "logits/rejected": -1.023921251296997, + "logps/chosen": -696.466552734375, + "logps/ref_chosen": -61.61454391479492, + "logps/ref_rejected": -82.14186096191406, + "logps/rejected": -1055.128662109375, + "loss": 1.1299, + "margin_dpo/margin_mean": 338.134765625, + "margin_dpo/margin_std": 491.9954833984375, + "step": 570 + }, + { + "KL/chosen_KL_mean": -630.3326416015625, + "KL/mean": -840.3262939453125, + "KL/rejected_KL_mean": -1050.320068359375, + "KL/std": 493.2159423828125, + "epoch": 0.8384728340675477, + "fcm_dpo/beta": 0.0010264207376167178, + "fcm_dpo/delta": -0.03282318636775017, + "fcm_dpo/margin": 419.9873046875, + "fcm_dpo/q_t": 0.4029013514518738, + "grad_norm": 31.51184844970703, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -0.9481862187385559, + "logits/rejected": -0.9578366279602051, + "logps/chosen": -683.38671875, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -1141.6568603515625, + "loss": 1.0896, + "margin_dpo/margin_mean": 419.9873352050781, + "margin_dpo/margin_std": 602.700439453125, + "step": 571 + }, + { + "KL/chosen_KL_mean": -692.636962890625, + "KL/mean": -900.9115600585938, + "KL/rejected_KL_mean": -1109.18603515625, + "KL/std": 563.3121337890625, + "epoch": 0.8399412628487518, + "fcm_dpo/beta": 0.0010215662186965346, + "fcm_dpo/delta": -0.026851139962673187, + "fcm_dpo/margin": 416.5491943359375, + "fcm_dpo/q_t": 0.40701764822006226, + "grad_norm": 28.058713912963867, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -0.9600076675415039, + "logits/rejected": -0.9866325855255127, + "logps/chosen": -741.0962524414062, + "logps/ref_chosen": -48.45928955078125, + "logps/ref_rejected": -83.55703735351562, + "logps/rejected": -1192.7431640625, + "loss": 1.1143, + "margin_dpo/margin_mean": 416.5491943359375, + "margin_dpo/margin_std": 682.91455078125, + "step": 572 + }, + { + "KL/chosen_KL_mean": -665.21142578125, + "KL/mean": -851.5628662109375, + "KL/rejected_KL_mean": -1037.914306640625, + "KL/std": 504.3909606933594, + "epoch": 0.8414096916299559, + "fcm_dpo/beta": 0.0010271357605233788, + "fcm_dpo/delta": 0.01777409017086029, + "fcm_dpo/margin": 372.70294189453125, + "fcm_dpo/q_t": 0.4132155776023865, + "grad_norm": 24.622867584228516, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -1.040392518043518, + "logits/rejected": -1.0433319807052612, + "logps/chosen": -716.833984375, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -1123.2392578125, + "loss": 1.1042, + "margin_dpo/margin_mean": 372.70294189453125, + "margin_dpo/margin_std": 519.7007446289062, + "step": 573 + }, + { + "KL/chosen_KL_mean": -621.5488891601562, + "KL/mean": -825.0750732421875, + "KL/rejected_KL_mean": -1028.601318359375, + "KL/std": 480.80010986328125, + "epoch": 0.8428781204111601, + "fcm_dpo/beta": 0.0010248222388327122, + "fcm_dpo/delta": -0.017912685871124268, + "fcm_dpo/margin": 407.05230712890625, + "fcm_dpo/q_t": 0.40481486916542053, + "grad_norm": 31.015390396118164, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -1.0330562591552734, + "logits/rejected": -1.0494093894958496, + "logps/chosen": -672.5933837890625, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -1121.40771484375, + "loss": 1.0857, + "margin_dpo/margin_mean": 407.05230712890625, + "margin_dpo/margin_std": 548.0139770507812, + "step": 574 + }, + { + "KL/chosen_KL_mean": -679.0296020507812, + "KL/mean": -858.723876953125, + "KL/rejected_KL_mean": -1038.418212890625, + "KL/std": 490.94537353515625, + "epoch": 0.8443465491923642, + "fcm_dpo/beta": 0.0010078808991238475, + "fcm_dpo/delta": -0.0724976509809494, + "fcm_dpo/margin": 359.38861083984375, + "fcm_dpo/q_t": 0.41705384850502014, + "grad_norm": 37.037967681884766, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.8891516923904419, + "logits/rejected": -0.8854223489761353, + "logps/chosen": -750.8197021484375, + "logps/ref_chosen": -71.7901382446289, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -1133.804443359375, + "loss": 1.1541, + "margin_dpo/margin_mean": 359.38861083984375, + "margin_dpo/margin_std": 618.3295288085938, + "step": 575 + }, + { + "KL/chosen_KL_mean": -629.9134521484375, + "KL/mean": -782.214111328125, + "KL/rejected_KL_mean": -934.5147705078125, + "KL/std": 449.57354736328125, + "epoch": 0.8458149779735683, + "fcm_dpo/beta": 0.0010182232363149524, + "fcm_dpo/delta": 0.09279034286737442, + "fcm_dpo/margin": 304.60137939453125, + "fcm_dpo/q_t": 0.4291686415672302, + "grad_norm": 26.034320831298828, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -0.9969866275787354, + "logits/rejected": -1.025536298751831, + "logps/chosen": -684.1763916015625, + "logps/ref_chosen": -54.262962341308594, + "logps/ref_rejected": -100.75428009033203, + "logps/rejected": -1035.26904296875, + "loss": 1.1671, + "margin_dpo/margin_mean": 304.60137939453125, + "margin_dpo/margin_std": 517.416748046875, + "step": 576 + }, + { + "KL/chosen_KL_mean": -572.9135131835938, + "KL/mean": -760.6309814453125, + "KL/rejected_KL_mean": -948.348388671875, + "KL/std": 488.9225158691406, + "epoch": 0.8472834067547724, + "fcm_dpo/beta": 0.001027634833008051, + "fcm_dpo/delta": 0.014742329716682434, + "fcm_dpo/margin": 375.43487548828125, + "fcm_dpo/q_t": 0.4120475649833679, + "grad_norm": 29.45041275024414, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -0.9848508834838867, + "logits/rejected": -0.9585464000701904, + "logps/chosen": -634.8232421875, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -1032.419189453125, + "loss": 1.1134, + "margin_dpo/margin_mean": 375.43487548828125, + "margin_dpo/margin_std": 524.5450439453125, + "step": 577 + }, + { + "KL/chosen_KL_mean": -563.70556640625, + "KL/mean": -769.3616943359375, + "KL/rejected_KL_mean": -975.017822265625, + "KL/std": 475.5242919921875, + "epoch": 0.8487518355359766, + "fcm_dpo/beta": 0.001021248521283269, + "fcm_dpo/delta": -0.021578624844551086, + "fcm_dpo/margin": 411.31219482421875, + "fcm_dpo/q_t": 0.40228772163391113, + "grad_norm": 34.02935791015625, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.8643592596054077, + "logits/rejected": -0.8935543894767761, + "logps/chosen": -612.96923828125, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.4362564086914, + "logps/rejected": -1058.4541015625, + "loss": 1.0665, + "margin_dpo/margin_mean": 411.31219482421875, + "margin_dpo/margin_std": 471.1173095703125, + "step": 578 + }, + { + "KL/chosen_KL_mean": -619.7525634765625, + "KL/mean": -754.7780151367188, + "KL/rejected_KL_mean": -889.803466796875, + "KL/std": 469.8000793457031, + "epoch": 0.8502202643171806, + "fcm_dpo/beta": 0.0010419844184070826, + "fcm_dpo/delta": 0.12221585214138031, + "fcm_dpo/margin": 270.0508728027344, + "fcm_dpo/q_t": 0.43572354316711426, + "grad_norm": 47.86115646362305, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -0.9247668981552124, + "logits/rejected": -0.918264627456665, + "logps/chosen": -678.638427734375, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -984.591064453125, + "loss": 1.194, + "margin_dpo/margin_mean": 270.0508728027344, + "margin_dpo/margin_std": 516.8199462890625, + "step": 579 + }, + { + "KL/chosen_KL_mean": -498.0091857910156, + "KL/mean": -721.5133056640625, + "KL/rejected_KL_mean": -945.0174560546875, + "KL/std": 533.5467529296875, + "epoch": 0.8516886930983847, + "fcm_dpo/beta": 0.0010376223362982273, + "fcm_dpo/delta": -0.06723435968160629, + "fcm_dpo/margin": 447.0083312988281, + "fcm_dpo/q_t": 0.3978680372238159, + "grad_norm": 30.88446617126465, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.8265971541404724, + "logits/rejected": -0.8474031686782837, + "logps/chosen": -546.7160034179688, + "logps/ref_chosen": -48.70683670043945, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -1026.77587890625, + "loss": 1.0653, + "margin_dpo/margin_mean": 447.0083312988281, + "margin_dpo/margin_std": 623.1065673828125, + "step": 580 + }, + { + "KL/chosen_KL_mean": -619.2047729492188, + "KL/mean": -788.866943359375, + "KL/rejected_KL_mean": -958.529052734375, + "KL/std": 473.9400634765625, + "epoch": 0.8531571218795888, + "fcm_dpo/beta": 0.001045089797116816, + "fcm_dpo/delta": 0.046971119940280914, + "fcm_dpo/margin": 339.32427978515625, + "fcm_dpo/q_t": 0.4190484881401062, + "grad_norm": 28.153030395507812, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -1.0129151344299316, + "logits/rejected": -1.0199846029281616, + "logps/chosen": -681.4271240234375, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -1050.2647705078125, + "loss": 1.133, + "margin_dpo/margin_mean": 339.3243103027344, + "margin_dpo/margin_std": 528.5376586914062, + "step": 581 + }, + { + "KL/chosen_KL_mean": -569.8297119140625, + "KL/mean": -768.3109130859375, + "KL/rejected_KL_mean": -966.7920532226562, + "KL/std": 444.11328125, + "epoch": 0.8546255506607929, + "fcm_dpo/beta": 0.001052438747137785, + "fcm_dpo/delta": -0.019892334938049316, + "fcm_dpo/margin": 396.9622802734375, + "fcm_dpo/q_t": 0.40300631523132324, + "grad_norm": 30.396282196044922, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -0.9671785831451416, + "logits/rejected": -0.9903292059898376, + "logps/chosen": -628.058349609375, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -1076.861572265625, + "loss": 1.0821, + "margin_dpo/margin_mean": 396.96234130859375, + "margin_dpo/margin_std": 493.5679931640625, + "step": 582 + }, + { + "KL/chosen_KL_mean": -644.336181640625, + "KL/mean": -767.6728515625, + "KL/rejected_KL_mean": -891.0096435546875, + "KL/std": 475.7613830566406, + "epoch": 0.856093979441997, + "fcm_dpo/beta": 0.0010401608888059855, + "fcm_dpo/delta": -0.01038757897913456, + "fcm_dpo/margin": 246.67337036132812, + "fcm_dpo/q_t": 0.4429190754890442, + "grad_norm": 31.7283878326416, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -0.9231404066085815, + "logits/rejected": -0.9159576892852783, + "logps/chosen": -701.1990966796875, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -979.41357421875, + "loss": 1.2385, + "margin_dpo/margin_mean": 246.6733856201172, + "margin_dpo/margin_std": 584.5450439453125, + "step": 583 + }, + { + "KL/chosen_KL_mean": -548.4625244140625, + "KL/mean": -786.724853515625, + "KL/rejected_KL_mean": -1024.9871826171875, + "KL/std": 508.10198974609375, + "epoch": 0.8575624082232012, + "fcm_dpo/beta": 0.001025655074045062, + "fcm_dpo/delta": -0.0933271199464798, + "fcm_dpo/margin": 476.524658203125, + "fcm_dpo/q_t": 0.3882429897785187, + "grad_norm": 27.133150100708008, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -0.9409053921699524, + "logits/rejected": -0.9479919672012329, + "logps/chosen": -605.3631591796875, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -1122.623291015625, + "loss": 1.0207, + "margin_dpo/margin_mean": 476.524658203125, + "margin_dpo/margin_std": 514.8095703125, + "step": 584 + }, + { + "KL/chosen_KL_mean": -656.8305053710938, + "KL/mean": -868.0594482421875, + "KL/rejected_KL_mean": -1079.288330078125, + "KL/std": 551.96435546875, + "epoch": 0.8590308370044053, + "fcm_dpo/beta": 0.001014050329104066, + "fcm_dpo/delta": -0.029842915013432503, + "fcm_dpo/margin": 422.4579162597656, + "fcm_dpo/q_t": 0.40475040674209595, + "grad_norm": 28.67152976989746, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -0.9976698756217957, + "logits/rejected": -1.021203875541687, + "logps/chosen": -717.804443359375, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -1163.4578857421875, + "loss": 1.1049, + "margin_dpo/margin_mean": 422.4578857421875, + "margin_dpo/margin_std": 662.7896728515625, + "step": 585 + }, + { + "KL/chosen_KL_mean": -663.6984252929688, + "KL/mean": -804.322021484375, + "KL/rejected_KL_mean": -944.9456176757812, + "KL/std": 479.632568359375, + "epoch": 0.8604992657856094, + "fcm_dpo/beta": 0.0010314470855519176, + "fcm_dpo/delta": 0.11326177418231964, + "fcm_dpo/margin": 281.2471618652344, + "fcm_dpo/q_t": 0.433902382850647, + "grad_norm": 29.24315071105957, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -0.9781264066696167, + "logits/rejected": -0.9948530197143555, + "logps/chosen": -720.5540161132812, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.80261993408203, + "logps/rejected": -1036.748291015625, + "loss": 1.1906, + "margin_dpo/margin_mean": 281.2471618652344, + "margin_dpo/margin_std": 540.845458984375, + "step": 586 + }, + { + "KL/chosen_KL_mean": -457.7816162109375, + "KL/mean": -663.854736328125, + "KL/rejected_KL_mean": -869.9278564453125, + "KL/std": 509.3709411621094, + "epoch": 0.8619676945668135, + "fcm_dpo/beta": 0.0010362120810896158, + "fcm_dpo/delta": -0.02832759916782379, + "fcm_dpo/margin": 412.146240234375, + "fcm_dpo/q_t": 0.40200504660606384, + "grad_norm": 31.405168533325195, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -1.012029767036438, + "logits/rejected": -1.0366127490997314, + "logps/chosen": -502.47320556640625, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -952.5516967773438, + "loss": 1.0723, + "margin_dpo/margin_mean": 412.146240234375, + "margin_dpo/margin_std": 520.8572998046875, + "step": 587 + }, + { + "KL/chosen_KL_mean": -634.4208984375, + "KL/mean": -803.1209716796875, + "KL/rejected_KL_mean": -971.821044921875, + "KL/std": 458.4549560546875, + "epoch": 0.8634361233480177, + "fcm_dpo/beta": 0.001035462599247694, + "fcm_dpo/delta": 0.05226360261440277, + "fcm_dpo/margin": 337.400146484375, + "fcm_dpo/q_t": 0.4189639091491699, + "grad_norm": 30.63210678100586, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -0.9633903503417969, + "logits/rejected": -0.9861800670623779, + "logps/chosen": -684.7158203125, + "logps/ref_chosen": -50.29494857788086, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -1079.19091796875, + "loss": 1.1261, + "margin_dpo/margin_mean": 337.4001770019531, + "margin_dpo/margin_std": 472.3351745605469, + "step": 588 + }, + { + "KL/chosen_KL_mean": -631.3834838867188, + "KL/mean": -850.8163452148438, + "KL/rejected_KL_mean": -1070.249267578125, + "KL/std": 498.56488037109375, + "epoch": 0.8649045521292217, + "fcm_dpo/beta": 0.001036192523315549, + "fcm_dpo/delta": -0.057322654873132706, + "fcm_dpo/margin": 438.8658142089844, + "fcm_dpo/q_t": 0.39515334367752075, + "grad_norm": 27.6535587310791, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -0.9325675964355469, + "logits/rejected": -0.9372642636299133, + "logps/chosen": -691.3133544921875, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -1181.904541015625, + "loss": 1.0514, + "margin_dpo/margin_mean": 438.8658142089844, + "margin_dpo/margin_std": 524.8668823242188, + "step": 589 + }, + { + "KL/chosen_KL_mean": -563.392333984375, + "KL/mean": -777.1893310546875, + "KL/rejected_KL_mean": -990.9863891601562, + "KL/std": 485.23968505859375, + "epoch": 0.8663729809104258, + "fcm_dpo/beta": 0.0010251689236611128, + "fcm_dpo/delta": -0.04011045768857002, + "fcm_dpo/margin": 427.59405517578125, + "fcm_dpo/q_t": 0.40004873275756836, + "grad_norm": 29.574190139770508, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.9315773248672485, + "logits/rejected": -0.9519675970077515, + "logps/chosen": -619.2021484375, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -1097.0491943359375, + "loss": 1.0636, + "margin_dpo/margin_mean": 427.59405517578125, + "margin_dpo/margin_std": 530.9572143554688, + "step": 590 + }, + { + "KL/chosen_KL_mean": -604.170654296875, + "KL/mean": -798.3487548828125, + "KL/rejected_KL_mean": -992.52685546875, + "KL/std": 467.26275634765625, + "epoch": 0.8678414096916299, + "fcm_dpo/beta": 0.0010215968359261751, + "fcm_dpo/delta": 0.0033291950821876526, + "fcm_dpo/margin": 388.35614013671875, + "fcm_dpo/q_t": 0.40842732787132263, + "grad_norm": 27.988250732421875, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -0.9853817224502563, + "logits/rejected": -0.9650702476501465, + "logps/chosen": -660.4113159179688, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -1076.3131103515625, + "loss": 1.0985, + "margin_dpo/margin_mean": 388.35614013671875, + "margin_dpo/margin_std": 533.7264404296875, + "step": 591 + }, + { + "KL/chosen_KL_mean": -623.6812133789062, + "KL/mean": -797.793212890625, + "KL/rejected_KL_mean": -971.9052734375, + "KL/std": 479.56719970703125, + "epoch": 0.869309838472834, + "fcm_dpo/beta": 0.0010291507933288813, + "fcm_dpo/delta": 0.04319499433040619, + "fcm_dpo/margin": 348.22406005859375, + "fcm_dpo/q_t": 0.4191494584083557, + "grad_norm": 25.876665115356445, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -0.9463398456573486, + "logits/rejected": -0.9439194798469543, + "logps/chosen": -671.6214599609375, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -1047.63818359375, + "loss": 1.1678, + "margin_dpo/margin_mean": 348.22406005859375, + "margin_dpo/margin_std": 647.0997314453125, + "step": 592 + }, + { + "KL/chosen_KL_mean": -612.51904296875, + "KL/mean": -746.0147705078125, + "KL/rejected_KL_mean": -879.5105590820312, + "KL/std": 498.0760498046875, + "epoch": 0.8707782672540382, + "fcm_dpo/beta": 0.0010393188567832112, + "fcm_dpo/delta": 0.03283300623297691, + "fcm_dpo/margin": 266.99151611328125, + "fcm_dpo/q_t": 0.43733033537864685, + "grad_norm": 34.479095458984375, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.8614813089370728, + "logits/rejected": -0.849102258682251, + "logps/chosen": -661.2098388671875, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800094604492, + "logps/rejected": -940.4185791015625, + "loss": 1.2072, + "margin_dpo/margin_mean": 266.99151611328125, + "margin_dpo/margin_std": 553.5676879882812, + "step": 593 + }, + { + "KL/chosen_KL_mean": -567.38037109375, + "KL/mean": -739.0123291015625, + "KL/rejected_KL_mean": -910.6441650390625, + "KL/std": 496.5671691894531, + "epoch": 0.8722466960352423, + "fcm_dpo/beta": 0.0010341550223529339, + "fcm_dpo/delta": -0.04993312805891037, + "fcm_dpo/margin": 343.2637939453125, + "fcm_dpo/q_t": 0.4182147681713104, + "grad_norm": 31.0626220703125, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -0.9724768400192261, + "logits/rejected": -0.9574602246284485, + "logps/chosen": -622.3153076171875, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967803955078, + "logps/rejected": -996.743896484375, + "loss": 1.1371, + "margin_dpo/margin_mean": 343.2638244628906, + "margin_dpo/margin_std": 524.0340576171875, + "step": 594 + }, + { + "KL/chosen_KL_mean": -574.682861328125, + "KL/mean": -762.4730224609375, + "KL/rejected_KL_mean": -950.2633056640625, + "KL/std": 482.28497314453125, + "epoch": 0.8737151248164464, + "fcm_dpo/beta": 0.0010293896775692701, + "fcm_dpo/delta": 0.013885049149394035, + "fcm_dpo/margin": 375.58038330078125, + "fcm_dpo/q_t": 0.41420266032218933, + "grad_norm": 31.87458610534668, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -0.928321361541748, + "logits/rejected": -0.9355098009109497, + "logps/chosen": -624.103271484375, + "logps/ref_chosen": -49.4204216003418, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -1030.890625, + "loss": 1.1078, + "margin_dpo/margin_mean": 375.58038330078125, + "margin_dpo/margin_std": 548.4652099609375, + "step": 595 + }, + { + "KL/chosen_KL_mean": -637.4048461914062, + "KL/mean": -785.274658203125, + "KL/rejected_KL_mean": -933.1444091796875, + "KL/std": 496.126708984375, + "epoch": 0.8751835535976505, + "fcm_dpo/beta": 0.0010479073971509933, + "fcm_dpo/delta": 0.09277448803186417, + "fcm_dpo/margin": 295.7396240234375, + "fcm_dpo/q_t": 0.431826651096344, + "grad_norm": 49.97145080566406, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -0.942278265953064, + "logits/rejected": -0.9016916155815125, + "logps/chosen": -697.196533203125, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -1013.235595703125, + "loss": 1.1853, + "margin_dpo/margin_mean": 295.7396240234375, + "margin_dpo/margin_std": 582.1591796875, + "step": 596 + }, + { + "KL/chosen_KL_mean": -593.11328125, + "KL/mean": -829.9117431640625, + "KL/rejected_KL_mean": -1066.7103271484375, + "KL/std": 540.2069091796875, + "epoch": 0.8766519823788547, + "fcm_dpo/beta": 0.0010269451886415482, + "fcm_dpo/delta": -0.09347677230834961, + "fcm_dpo/margin": 473.59698486328125, + "fcm_dpo/q_t": 0.39144447445869446, + "grad_norm": 33.3725471496582, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.8926633596420288, + "logits/rejected": -0.9352051019668579, + "logps/chosen": -650.3740844726562, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -1167.404052734375, + "loss": 1.0513, + "margin_dpo/margin_mean": 473.59698486328125, + "margin_dpo/margin_std": 604.17333984375, + "step": 597 + }, + { + "KL/chosen_KL_mean": -608.8829345703125, + "KL/mean": -787.3731689453125, + "KL/rejected_KL_mean": -965.8634033203125, + "KL/std": 469.7054138183594, + "epoch": 0.8781204111600588, + "fcm_dpo/beta": 0.0010294051608070731, + "fcm_dpo/delta": 0.03294781595468521, + "fcm_dpo/margin": 356.9803771972656, + "fcm_dpo/q_t": 0.4142112135887146, + "grad_norm": 26.78837013244629, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -0.9758745431900024, + "logits/rejected": -0.9783375859260559, + "logps/chosen": -661.4014892578125, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -1055.3072509765625, + "loss": 1.1243, + "margin_dpo/margin_mean": 356.98040771484375, + "margin_dpo/margin_std": 516.56591796875, + "step": 598 + }, + { + "KL/chosen_KL_mean": -612.8923950195312, + "KL/mean": -774.1793823242188, + "KL/rejected_KL_mean": -935.4663696289062, + "KL/std": 445.364501953125, + "epoch": 0.8795888399412628, + "fcm_dpo/beta": 0.0010303169256076217, + "fcm_dpo/delta": -0.039691608399152756, + "fcm_dpo/margin": 322.57403564453125, + "fcm_dpo/q_t": 0.4193369150161743, + "grad_norm": 28.11864471435547, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -0.9431838989257812, + "logits/rejected": -0.9493868350982666, + "logps/chosen": -662.695068359375, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -1018.4449462890625, + "loss": 1.129, + "margin_dpo/margin_mean": 322.57403564453125, + "margin_dpo/margin_std": 408.63031005859375, + "step": 599 + }, + { + "KL/chosen_KL_mean": -657.6517333984375, + "KL/mean": -805.3268432617188, + "KL/rejected_KL_mean": -953.001953125, + "KL/std": 460.2043151855469, + "epoch": 0.8810572687224669, + "fcm_dpo/beta": 0.001043910626322031, + "fcm_dpo/delta": 0.09467366337776184, + "fcm_dpo/margin": 295.3502197265625, + "fcm_dpo/q_t": 0.4305538535118103, + "grad_norm": 35.54128646850586, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -1.0306495428085327, + "logits/rejected": -1.0190434455871582, + "logps/chosen": -724.086669921875, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -1038.45849609375, + "loss": 1.1772, + "margin_dpo/margin_mean": 295.3502197265625, + "margin_dpo/margin_std": 545.6597290039062, + "step": 600 + }, + { + "KL/chosen_KL_mean": -669.33251953125, + "KL/mean": -859.396240234375, + "KL/rejected_KL_mean": -1049.4599609375, + "KL/std": 509.2434997558594, + "epoch": 0.882525697503671, + "fcm_dpo/beta": 0.0010490333661437035, + "fcm_dpo/delta": 0.0012606056407094002, + "fcm_dpo/margin": 380.12744140625, + "fcm_dpo/q_t": 0.40675991773605347, + "grad_norm": 29.669403076171875, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -0.9606672525405884, + "logits/rejected": -0.9689534306526184, + "logps/chosen": -728.4661254882812, + "logps/ref_chosen": -59.13361358642578, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -1144.15087890625, + "loss": 1.088, + "margin_dpo/margin_mean": 380.12744140625, + "margin_dpo/margin_std": 485.5259704589844, + "step": 601 + }, + { + "KL/chosen_KL_mean": -479.5604248046875, + "KL/mean": -705.6307373046875, + "KL/rejected_KL_mean": -931.701171875, + "KL/std": 458.986328125, + "epoch": 0.8839941262848752, + "fcm_dpo/beta": 0.001045595621690154, + "fcm_dpo/delta": -0.07681306451559067, + "fcm_dpo/margin": 452.1407470703125, + "fcm_dpo/q_t": 0.39179527759552, + "grad_norm": 48.49148178100586, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -0.9890528917312622, + "logits/rejected": -1.0126900672912598, + "logps/chosen": -528.1539306640625, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -1019.3697509765625, + "loss": 1.0357, + "margin_dpo/margin_mean": 452.1407470703125, + "margin_dpo/margin_std": 503.97027587890625, + "step": 602 + }, + { + "KL/chosen_KL_mean": -562.6729125976562, + "KL/mean": -779.9182739257812, + "KL/rejected_KL_mean": -997.16357421875, + "KL/std": 511.3848876953125, + "epoch": 0.8854625550660793, + "fcm_dpo/beta": 0.0010269982740283012, + "fcm_dpo/delta": -0.04837334156036377, + "fcm_dpo/margin": 434.49066162109375, + "fcm_dpo/q_t": 0.40095192193984985, + "grad_norm": 47.26500701904297, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -0.9793489575386047, + "logits/rejected": -0.9629038572311401, + "logps/chosen": -633.0875244140625, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32559967041016, + "logps/rejected": -1097.4892578125, + "loss": 1.0773, + "margin_dpo/margin_mean": 434.49066162109375, + "margin_dpo/margin_std": 614.062744140625, + "step": 603 + }, + { + "KL/chosen_KL_mean": -594.250244140625, + "KL/mean": -817.974853515625, + "KL/rejected_KL_mean": -1041.699462890625, + "KL/std": 504.0755615234375, + "epoch": 0.8869309838472834, + "fcm_dpo/beta": 0.0010149029549211264, + "fcm_dpo/delta": -0.056748565286397934, + "fcm_dpo/margin": 447.44915771484375, + "fcm_dpo/q_t": 0.39813223481178284, + "grad_norm": 33.27207565307617, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -0.9312797784805298, + "logits/rejected": -0.957409143447876, + "logps/chosen": -640.7083129882812, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -1133.553955078125, + "loss": 1.075, + "margin_dpo/margin_mean": 447.44915771484375, + "margin_dpo/margin_std": 625.2024536132812, + "step": 604 + }, + { + "KL/chosen_KL_mean": -613.604248046875, + "KL/mean": -800.700927734375, + "KL/rejected_KL_mean": -987.7977294921875, + "KL/std": 475.6051025390625, + "epoch": 0.8883994126284875, + "fcm_dpo/beta": 0.0010130970040336251, + "fcm_dpo/delta": 0.021622149273753166, + "fcm_dpo/margin": 374.1934509277344, + "fcm_dpo/q_t": 0.4142388701438904, + "grad_norm": 35.06957244873047, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -0.926941990852356, + "logits/rejected": -0.9346826672554016, + "logps/chosen": -679.8536376953125, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -1090.1026611328125, + "loss": 1.1146, + "margin_dpo/margin_mean": 374.19342041015625, + "margin_dpo/margin_std": 554.1260986328125, + "step": 605 + }, + { + "KL/chosen_KL_mean": -615.2284545898438, + "KL/mean": -821.0089111328125, + "KL/rejected_KL_mean": -1026.7894287109375, + "KL/std": 506.0755615234375, + "epoch": 0.8898678414096917, + "fcm_dpo/beta": 0.001017784932628274, + "fcm_dpo/delta": -0.01989796943962574, + "fcm_dpo/margin": 411.5608825683594, + "fcm_dpo/q_t": 0.4054357707500458, + "grad_norm": 25.421344757080078, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -0.9341834783554077, + "logits/rejected": -0.9450877904891968, + "logps/chosen": -670.047607421875, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37146759033203, + "logps/rejected": -1125.160888671875, + "loss": 1.0817, + "margin_dpo/margin_mean": 411.5608825683594, + "margin_dpo/margin_std": 548.4053955078125, + "step": 606 + }, + { + "KL/chosen_KL_mean": -652.0457763671875, + "KL/mean": -803.5496826171875, + "KL/rejected_KL_mean": -955.053466796875, + "KL/std": 466.5636291503906, + "epoch": 0.8913362701908958, + "fcm_dpo/beta": 0.0010304426541551948, + "fcm_dpo/delta": 0.09002204239368439, + "fcm_dpo/margin": 303.0076904296875, + "fcm_dpo/q_t": 0.4281555414199829, + "grad_norm": 29.748842239379883, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -0.9252548217773438, + "logits/rejected": -0.9004162549972534, + "logps/chosen": -710.1298828125, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -1034.83056640625, + "loss": 1.1683, + "margin_dpo/margin_mean": 303.0076904296875, + "margin_dpo/margin_std": 525.4556274414062, + "step": 607 + }, + { + "KL/chosen_KL_mean": -589.3780517578125, + "KL/mean": -781.2340698242188, + "KL/rejected_KL_mean": -973.090087890625, + "KL/std": 448.39385986328125, + "epoch": 0.8928046989720999, + "fcm_dpo/beta": 0.001034360844641924, + "fcm_dpo/delta": 0.0031163040548563004, + "fcm_dpo/margin": 383.7120056152344, + "fcm_dpo/q_t": 0.4076157808303833, + "grad_norm": 38.95619583129883, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -0.9842853546142578, + "logits/rejected": -0.9917802810668945, + "logps/chosen": -646.8289184570312, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -1067.863525390625, + "loss": 1.0818, + "margin_dpo/margin_mean": 383.71197509765625, + "margin_dpo/margin_std": 460.55084228515625, + "step": 608 + }, + { + "KL/chosen_KL_mean": -609.8814697265625, + "KL/mean": -836.5782470703125, + "KL/rejected_KL_mean": -1063.2750244140625, + "KL/std": 583.9754638671875, + "epoch": 0.8942731277533039, + "fcm_dpo/beta": 0.0010232683271169662, + "fcm_dpo/delta": -0.06700144708156586, + "fcm_dpo/margin": 453.39361572265625, + "fcm_dpo/q_t": 0.3996211588382721, + "grad_norm": 29.039478302001953, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -1.0189104080200195, + "logits/rejected": -1.0311899185180664, + "logps/chosen": -668.6868896484375, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -1152.091064453125, + "loss": 1.0751, + "margin_dpo/margin_mean": 453.39361572265625, + "margin_dpo/margin_std": 680.756103515625, + "step": 609 + }, + { + "KL/chosen_KL_mean": -615.740966796875, + "KL/mean": -764.8184814453125, + "KL/rejected_KL_mean": -913.8959350585938, + "KL/std": 459.981201171875, + "epoch": 0.895741556534508, + "fcm_dpo/beta": 0.0010335487313568592, + "fcm_dpo/delta": 0.09469583630561829, + "fcm_dpo/margin": 298.15496826171875, + "fcm_dpo/q_t": 0.42828184366226196, + "grad_norm": 47.39312744140625, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.9286304712295532, + "logits/rejected": -0.8889775276184082, + "logps/chosen": -681.43603515625, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.40538787841797, + "logps/rejected": -997.3013305664062, + "loss": 1.1705, + "margin_dpo/margin_mean": 298.15496826171875, + "margin_dpo/margin_std": 523.1708984375, + "step": 610 + }, + { + "KL/chosen_KL_mean": -617.3861694335938, + "KL/mean": -879.7110595703125, + "KL/rejected_KL_mean": -1142.035888671875, + "KL/std": 586.503173828125, + "epoch": 0.8972099853157122, + "fcm_dpo/beta": 0.001017481554299593, + "fcm_dpo/delta": -0.14123646914958954, + "fcm_dpo/margin": 524.6498413085938, + "fcm_dpo/q_t": 0.38256320357322693, + "grad_norm": 33.80018615722656, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -0.9945396184921265, + "logits/rejected": -1.0387227535247803, + "logps/chosen": -669.985595703125, + "logps/ref_chosen": -52.59946823120117, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -1228.366943359375, + "loss": 1.026, + "margin_dpo/margin_mean": 524.6498413085938, + "margin_dpo/margin_std": 674.8773193359375, + "step": 611 + }, + { + "KL/chosen_KL_mean": -683.014892578125, + "KL/mean": -888.0888671875, + "KL/rejected_KL_mean": -1093.162841796875, + "KL/std": 499.14178466796875, + "epoch": 0.8986784140969163, + "fcm_dpo/beta": 0.001010039821267128, + "fcm_dpo/delta": -0.015054378658533096, + "fcm_dpo/margin": 410.1479797363281, + "fcm_dpo/q_t": 0.40539655089378357, + "grad_norm": 24.813846588134766, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -1.0098485946655273, + "logits/rejected": -1.000733733177185, + "logps/chosen": -742.338623046875, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -1181.475341796875, + "loss": 1.0831, + "margin_dpo/margin_mean": 410.1479797363281, + "margin_dpo/margin_std": 539.570556640625, + "step": 612 + }, + { + "KL/chosen_KL_mean": -591.58203125, + "KL/mean": -833.6363525390625, + "KL/rejected_KL_mean": -1075.690673828125, + "KL/std": 527.3922119140625, + "epoch": 0.9001468428781204, + "fcm_dpo/beta": 0.0009931058157235384, + "fcm_dpo/delta": -0.08487021923065186, + "fcm_dpo/margin": 484.1087646484375, + "fcm_dpo/q_t": 0.38921403884887695, + "grad_norm": 35.546573638916016, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -1.0186982154846191, + "logits/rejected": -1.0692498683929443, + "logps/chosen": -651.31201171875, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10752868652344, + "logps/rejected": -1180.79833984375, + "loss": 1.0269, + "margin_dpo/margin_mean": 484.1087646484375, + "margin_dpo/margin_std": 536.681884765625, + "step": 613 + }, + { + "KL/chosen_KL_mean": -657.602294921875, + "KL/mean": -874.024169921875, + "KL/rejected_KL_mean": -1090.4461669921875, + "KL/std": 522.123046875, + "epoch": 0.9016152716593245, + "fcm_dpo/beta": 0.0009876348776742816, + "fcm_dpo/delta": -0.028807081282138824, + "fcm_dpo/margin": 432.84381103515625, + "fcm_dpo/q_t": 0.4031580984592438, + "grad_norm": 34.93425750732422, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -0.9571743011474609, + "logits/rejected": -1.0247644186019897, + "logps/chosen": -710.541259765625, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -1195.12548828125, + "loss": 1.0948, + "margin_dpo/margin_mean": 432.84381103515625, + "margin_dpo/margin_std": 632.5867919921875, + "step": 614 + }, + { + "KL/chosen_KL_mean": -625.9893798828125, + "KL/mean": -813.37548828125, + "KL/rejected_KL_mean": -1000.7615966796875, + "KL/std": 563.71142578125, + "epoch": 0.9030837004405287, + "fcm_dpo/beta": 0.000991692766547203, + "fcm_dpo/delta": 0.02896309643983841, + "fcm_dpo/margin": 374.772216796875, + "fcm_dpo/q_t": 0.41585174202919006, + "grad_norm": 25.01183319091797, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -0.9686431884765625, + "logits/rejected": -0.9801833033561707, + "logps/chosen": -691.8067016601562, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -1095.939208984375, + "loss": 1.1292, + "margin_dpo/margin_mean": 374.772216796875, + "margin_dpo/margin_std": 597.46826171875, + "step": 615 + }, + { + "KL/chosen_KL_mean": -744.90234375, + "KL/mean": -902.1414184570312, + "KL/rejected_KL_mean": -1059.3804931640625, + "KL/std": 491.1352233886719, + "epoch": 0.9045521292217328, + "fcm_dpo/beta": 0.0010039603803306818, + "fcm_dpo/delta": 0.08676433563232422, + "fcm_dpo/margin": 314.47821044921875, + "fcm_dpo/q_t": 0.42835602164268494, + "grad_norm": 39.332359313964844, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -1.029462218284607, + "logits/rejected": -1.003951907157898, + "logps/chosen": -810.03515625, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -1134.0810546875, + "loss": 1.1674, + "margin_dpo/margin_mean": 314.478271484375, + "margin_dpo/margin_std": 547.7131958007812, + "step": 616 + }, + { + "KL/chosen_KL_mean": -681.607666015625, + "KL/mean": -797.3641967773438, + "KL/rejected_KL_mean": -913.1207275390625, + "KL/std": 436.149169921875, + "epoch": 0.9060205580029369, + "fcm_dpo/beta": 0.001032671658322215, + "fcm_dpo/delta": 0.16463825106620789, + "fcm_dpo/margin": 231.5129852294922, + "fcm_dpo/q_t": 0.44537049531936646, + "grad_norm": 54.0463981628418, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -0.9412636756896973, + "logits/rejected": -0.8970128297805786, + "logps/chosen": -744.61328125, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -977.3547973632812, + "loss": 1.2256, + "margin_dpo/margin_mean": 231.51300048828125, + "margin_dpo/margin_std": 500.28045654296875, + "step": 617 + }, + { + "KL/chosen_KL_mean": -718.72119140625, + "KL/mean": -963.6566772460938, + "KL/rejected_KL_mean": -1208.59228515625, + "KL/std": 597.5335693359375, + "epoch": 0.9074889867841409, + "fcm_dpo/beta": 0.0010278007248416543, + "fcm_dpo/delta": -0.1088884249329567, + "fcm_dpo/margin": 489.87103271484375, + "fcm_dpo/q_t": 0.39031103253364563, + "grad_norm": 36.161380767822266, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -0.9905341267585754, + "logits/rejected": -1.0219985246658325, + "logps/chosen": -785.822509765625, + "logps/ref_chosen": -67.10134887695312, + "logps/ref_rejected": -92.15340423583984, + "logps/rejected": -1300.74560546875, + "loss": 1.0778, + "margin_dpo/margin_mean": 489.87103271484375, + "margin_dpo/margin_std": 753.9830322265625, + "step": 618 + }, + { + "KL/chosen_KL_mean": -694.73486328125, + "KL/mean": -886.2698974609375, + "KL/rejected_KL_mean": -1077.804931640625, + "KL/std": 564.9559326171875, + "epoch": 0.908957415565345, + "fcm_dpo/beta": 0.0010191791225224733, + "fcm_dpo/delta": 0.00997202843427658, + "fcm_dpo/margin": 383.0700378417969, + "fcm_dpo/q_t": 0.4178283214569092, + "grad_norm": 38.189334869384766, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -0.9747885465621948, + "logits/rejected": -0.9790507555007935, + "logps/chosen": -750.713134765625, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -1170.990234375, + "loss": 1.1572, + "margin_dpo/margin_mean": 383.070068359375, + "margin_dpo/margin_std": 730.9571533203125, + "step": 619 + }, + { + "KL/chosen_KL_mean": -647.696044921875, + "KL/mean": -829.687255859375, + "KL/rejected_KL_mean": -1011.6784057617188, + "KL/std": 487.70135498046875, + "epoch": 0.9104258443465492, + "fcm_dpo/beta": 0.0010204799473285675, + "fcm_dpo/delta": 0.029505692422389984, + "fcm_dpo/margin": 363.9824523925781, + "fcm_dpo/q_t": 0.41603296995162964, + "grad_norm": 30.40621566772461, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -1.0277998447418213, + "logits/rejected": -1.0267902612686157, + "logps/chosen": -707.4935302734375, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -1090.089111328125, + "loss": 1.1238, + "margin_dpo/margin_mean": 363.982421875, + "margin_dpo/margin_std": 553.198974609375, + "step": 620 + }, + { + "KL/chosen_KL_mean": -630.0430908203125, + "KL/mean": -906.111083984375, + "KL/rejected_KL_mean": -1182.178955078125, + "KL/std": 607.4686279296875, + "epoch": 0.9118942731277533, + "fcm_dpo/beta": 0.001008342718705535, + "fcm_dpo/delta": -0.16588960587978363, + "fcm_dpo/margin": 552.135986328125, + "fcm_dpo/q_t": 0.37482962012290955, + "grad_norm": 46.220027923583984, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -1.044553518295288, + "logits/rejected": -1.0921435356140137, + "logps/chosen": -683.976806640625, + "logps/ref_chosen": -53.93375778198242, + "logps/ref_rejected": -88.36951446533203, + "logps/rejected": -1270.548583984375, + "loss": 1.0065, + "margin_dpo/margin_mean": 552.135986328125, + "margin_dpo/margin_std": 646.8200073242188, + "step": 621 + }, + { + "KL/chosen_KL_mean": -638.1873779296875, + "KL/mean": -831.6390380859375, + "KL/rejected_KL_mean": -1025.090576171875, + "KL/std": 474.5973815917969, + "epoch": 0.9133627019089574, + "fcm_dpo/beta": 0.0009906619088724256, + "fcm_dpo/delta": 0.01709701120853424, + "fcm_dpo/margin": 386.9031677246094, + "fcm_dpo/q_t": 0.4140721559524536, + "grad_norm": 29.612194061279297, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.9218890070915222, + "logits/rejected": -0.8999383449554443, + "logps/chosen": -698.4732666015625, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -1110.609375, + "loss": 1.1149, + "margin_dpo/margin_mean": 386.90313720703125, + "margin_dpo/margin_std": 587.5882568359375, + "step": 622 + }, + { + "KL/chosen_KL_mean": -709.4207153320312, + "KL/mean": -861.94287109375, + "KL/rejected_KL_mean": -1014.465087890625, + "KL/std": 493.4505920410156, + "epoch": 0.9148311306901615, + "fcm_dpo/beta": 0.0010092295706272125, + "fcm_dpo/delta": 0.09511934220790863, + "fcm_dpo/margin": 305.0443420410156, + "fcm_dpo/q_t": 0.431568443775177, + "grad_norm": 41.82392883300781, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -1.078977346420288, + "logits/rejected": -1.0663626194000244, + "logps/chosen": -773.57763671875, + "logps/ref_chosen": -64.1569595336914, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -1099.548095703125, + "loss": 1.1834, + "margin_dpo/margin_mean": 305.0443115234375, + "margin_dpo/margin_std": 590.980224609375, + "step": 623 + }, + { + "KL/chosen_KL_mean": -691.134033203125, + "KL/mean": -919.0125122070312, + "KL/rejected_KL_mean": -1146.890869140625, + "KL/std": 508.752685546875, + "epoch": 0.9162995594713657, + "fcm_dpo/beta": 0.0010058375773951411, + "fcm_dpo/delta": -0.06120828539133072, + "fcm_dpo/margin": 455.75689697265625, + "fcm_dpo/q_t": 0.39310479164123535, + "grad_norm": 32.26824951171875, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -1.066502332687378, + "logits/rejected": -1.0555529594421387, + "logps/chosen": -763.052734375, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -1244.02294921875, + "loss": 1.0517, + "margin_dpo/margin_mean": 455.75689697265625, + "margin_dpo/margin_std": 550.7293701171875, + "step": 624 + }, + { + "KL/chosen_KL_mean": -651.8470458984375, + "KL/mean": -884.4716796875, + "KL/rejected_KL_mean": -1117.0963134765625, + "KL/std": 530.2442626953125, + "epoch": 0.9177679882525698, + "fcm_dpo/beta": 0.0009930902160704136, + "fcm_dpo/delta": -0.06502003967761993, + "fcm_dpo/margin": 465.24932861328125, + "fcm_dpo/q_t": 0.394126296043396, + "grad_norm": 47.13850402832031, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -0.9659937024116516, + "logits/rejected": -0.9809095859527588, + "logps/chosen": -710.1890869140625, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -1203.186767578125, + "loss": 1.0347, + "margin_dpo/margin_mean": 465.24932861328125, + "margin_dpo/margin_std": 516.6107788085938, + "step": 625 + }, + { + "KL/chosen_KL_mean": -775.7965087890625, + "KL/mean": -922.9110107421875, + "KL/rejected_KL_mean": -1070.0255126953125, + "KL/std": 596.8397216796875, + "epoch": 0.9192364170337739, + "fcm_dpo/beta": 0.0010072626173496246, + "fcm_dpo/delta": 0.10651648044586182, + "fcm_dpo/margin": 294.22900390625, + "fcm_dpo/q_t": 0.4323081970214844, + "grad_norm": 27.3458251953125, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.9275539517402649, + "logits/rejected": -0.9108865261077881, + "logps/chosen": -850.9091796875, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.188720703125, + "logps/rejected": -1169.2142333984375, + "loss": 1.1941, + "margin_dpo/margin_mean": 294.22900390625, + "margin_dpo/margin_std": 575.285400390625, + "step": 626 + }, + { + "KL/chosen_KL_mean": -557.2504272460938, + "KL/mean": -842.9886474609375, + "KL/rejected_KL_mean": -1128.726806640625, + "KL/std": 656.2432250976562, + "epoch": 0.920704845814978, + "fcm_dpo/beta": 0.0009910902008414268, + "fcm_dpo/delta": -0.176089346408844, + "fcm_dpo/margin": 571.476318359375, + "fcm_dpo/q_t": 0.3810996115207672, + "grad_norm": 26.04566192626953, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.8594233989715576, + "logits/rejected": -0.9377764463424683, + "logps/chosen": -604.9935913085938, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -1235.481201171875, + "loss": 1.0171, + "margin_dpo/margin_mean": 571.476318359375, + "margin_dpo/margin_std": 751.080810546875, + "step": 627 + }, + { + "KL/chosen_KL_mean": -722.8740844726562, + "KL/mean": -924.2254638671875, + "KL/rejected_KL_mean": -1125.5770263671875, + "KL/std": 539.3365478515625, + "epoch": 0.922173274596182, + "fcm_dpo/beta": 0.0009758264059200883, + "fcm_dpo/delta": 0.007305025588721037, + "fcm_dpo/margin": 402.702880859375, + "fcm_dpo/q_t": 0.41142743825912476, + "grad_norm": 27.978281021118164, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -1.021456003189087, + "logits/rejected": -1.0348306894302368, + "logps/chosen": -783.0570068359375, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -1227.131591796875, + "loss": 1.113, + "margin_dpo/margin_mean": 402.702880859375, + "margin_dpo/margin_std": 607.0352783203125, + "step": 628 + }, + { + "KL/chosen_KL_mean": -697.99755859375, + "KL/mean": -897.359375, + "KL/rejected_KL_mean": -1096.72119140625, + "KL/std": 534.4760131835938, + "epoch": 0.9236417033773862, + "fcm_dpo/beta": 0.0009785511065274477, + "fcm_dpo/delta": 0.010228663682937622, + "fcm_dpo/margin": 398.7236328125, + "fcm_dpo/q_t": 0.4093359112739563, + "grad_norm": 31.393468856811523, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -1.0401864051818848, + "logits/rejected": -1.0507943630218506, + "logps/chosen": -762.2110595703125, + "logps/ref_chosen": -64.21354675292969, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -1188.374755859375, + "loss": 1.0973, + "margin_dpo/margin_mean": 398.7236328125, + "margin_dpo/margin_std": 530.4059448242188, + "step": 629 + }, + { + "KL/chosen_KL_mean": -634.3704833984375, + "KL/mean": -745.990478515625, + "KL/rejected_KL_mean": -857.6103515625, + "KL/std": 511.8554382324219, + "epoch": 0.9251101321585903, + "fcm_dpo/beta": 0.000984629150480032, + "fcm_dpo/delta": 0.05507725104689598, + "fcm_dpo/margin": 223.2398681640625, + "fcm_dpo/q_t": 0.4526089131832123, + "grad_norm": 44.8847541809082, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -0.9998958706855774, + "logits/rejected": -0.9623087644577026, + "logps/chosen": -693.6614990234375, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -941.2086791992188, + "loss": 1.2595, + "margin_dpo/margin_mean": 223.2398681640625, + "margin_dpo/margin_std": 596.5005493164062, + "step": 630 + }, + { + "KL/chosen_KL_mean": -742.057861328125, + "KL/mean": -920.331298828125, + "KL/rejected_KL_mean": -1098.604736328125, + "KL/std": 551.6812133789062, + "epoch": 0.9265785609397944, + "fcm_dpo/beta": 0.0009971531108021736, + "fcm_dpo/delta": 0.04609350860118866, + "fcm_dpo/margin": 356.5467529296875, + "fcm_dpo/q_t": 0.4193703532218933, + "grad_norm": 27.521265029907227, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -1.0666249990463257, + "logits/rejected": -1.0676807165145874, + "logps/chosen": -801.511474609375, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95156860351562, + "logps/rejected": -1179.5562744140625, + "loss": 1.1545, + "margin_dpo/margin_mean": 356.5467529296875, + "margin_dpo/margin_std": 633.4856567382812, + "step": 631 + }, + { + "KL/chosen_KL_mean": -661.1845092773438, + "KL/mean": -845.2674560546875, + "KL/rejected_KL_mean": -1029.350341796875, + "KL/std": 465.203369140625, + "epoch": 0.9280469897209985, + "fcm_dpo/beta": 0.0009988134261220694, + "fcm_dpo/delta": 0.03309358283877373, + "fcm_dpo/margin": 368.1659240722656, + "fcm_dpo/q_t": 0.4161521792411804, + "grad_norm": 31.457181930541992, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -1.0178093910217285, + "logits/rejected": -1.0131831169128418, + "logps/chosen": -722.5360717773438, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -1115.510498046875, + "loss": 1.1364, + "margin_dpo/margin_mean": 368.16595458984375, + "margin_dpo/margin_std": 587.306884765625, + "step": 632 + }, + { + "KL/chosen_KL_mean": -722.002685546875, + "KL/mean": -844.985107421875, + "KL/rejected_KL_mean": -967.967529296875, + "KL/std": 474.93060302734375, + "epoch": 0.9295154185022027, + "fcm_dpo/beta": 0.0010103812674060464, + "fcm_dpo/delta": 0.04477893188595772, + "fcm_dpo/margin": 245.96484375, + "fcm_dpo/q_t": 0.44230562448501587, + "grad_norm": 33.3397102355957, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -1.0611484050750732, + "logits/rejected": -1.0687685012817383, + "logps/chosen": -779.2808837890625, + "logps/ref_chosen": -57.27816390991211, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -1059.551513671875, + "loss": 1.2266, + "margin_dpo/margin_mean": 245.96484375, + "margin_dpo/margin_std": 546.748046875, + "step": 633 + }, + { + "KL/chosen_KL_mean": -794.5701904296875, + "KL/mean": -933.633544921875, + "KL/rejected_KL_mean": -1072.697021484375, + "KL/std": 556.5794067382812, + "epoch": 0.9309838472834068, + "fcm_dpo/beta": 0.0010201697004958987, + "fcm_dpo/delta": 0.025866778567433357, + "fcm_dpo/margin": 278.1268615722656, + "fcm_dpo/q_t": 0.43637216091156006, + "grad_norm": 29.091527938842773, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -0.9369779825210571, + "logits/rejected": -0.932574987411499, + "logps/chosen": -861.1891479492188, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12564849853516, + "logps/rejected": -1179.82275390625, + "loss": 1.2001, + "margin_dpo/margin_mean": 278.1268310546875, + "margin_dpo/margin_std": 560.8475952148438, + "step": 634 + }, + { + "KL/chosen_KL_mean": -698.1052856445312, + "KL/mean": -869.6240844726562, + "KL/rejected_KL_mean": -1041.142822265625, + "KL/std": 603.190185546875, + "epoch": 0.9324522760646109, + "fcm_dpo/beta": 0.0010265845339745283, + "fcm_dpo/delta": 0.04961933195590973, + "fcm_dpo/margin": 343.0375671386719, + "fcm_dpo/q_t": 0.421801894903183, + "grad_norm": 36.472694396972656, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -1.003667950630188, + "logits/rejected": -0.9820040464401245, + "logps/chosen": -772.060791015625, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -1123.643310546875, + "loss": 1.1452, + "margin_dpo/margin_mean": 343.0375671386719, + "margin_dpo/margin_std": 582.0855712890625, + "step": 635 + }, + { + "KL/chosen_KL_mean": -670.5150146484375, + "KL/mean": -865.4879760742188, + "KL/rejected_KL_mean": -1060.4609375, + "KL/std": 573.9064331054688, + "epoch": 0.933920704845815, + "fcm_dpo/beta": 0.0010364481713622808, + "fcm_dpo/delta": -0.0052045732736587524, + "fcm_dpo/margin": 389.94598388671875, + "fcm_dpo/q_t": 0.4110804796218872, + "grad_norm": 28.409608840942383, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -0.9490704536437988, + "logits/rejected": -0.9512150287628174, + "logps/chosen": -730.1439208984375, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -1142.4398193359375, + "loss": 1.1069, + "margin_dpo/margin_mean": 389.9460144042969, + "margin_dpo/margin_std": 601.882568359375, + "step": 636 + }, + { + "KL/chosen_KL_mean": -652.6944580078125, + "KL/mean": -868.58056640625, + "KL/rejected_KL_mean": -1084.4666748046875, + "KL/std": 552.986572265625, + "epoch": 0.9353891336270191, + "fcm_dpo/beta": 0.0010184976272284985, + "fcm_dpo/delta": -0.04213680326938629, + "fcm_dpo/margin": 431.7720947265625, + "fcm_dpo/q_t": 0.4004812240600586, + "grad_norm": 30.13751792907715, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -1.028292179107666, + "logits/rejected": -1.0698986053466797, + "logps/chosen": -702.34716796875, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -1182.871826171875, + "loss": 1.0734, + "margin_dpo/margin_mean": 431.7720947265625, + "margin_dpo/margin_std": 576.40869140625, + "step": 637 + }, + { + "KL/chosen_KL_mean": -654.815185546875, + "KL/mean": -831.240234375, + "KL/rejected_KL_mean": -1007.665283203125, + "KL/std": 540.849609375, + "epoch": 0.9368575624082232, + "fcm_dpo/beta": 0.001035545952618122, + "fcm_dpo/delta": 0.03378577530384064, + "fcm_dpo/margin": 352.8500671386719, + "fcm_dpo/q_t": 0.41145235300064087, + "grad_norm": 40.42203140258789, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.9185539484024048, + "logits/rejected": -0.8953431248664856, + "logps/chosen": -712.9718017578125, + "logps/ref_chosen": -58.156639099121094, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -1086.966796875, + "loss": 1.1535, + "margin_dpo/margin_mean": 352.8500671386719, + "margin_dpo/margin_std": 612.448974609375, + "step": 638 + }, + { + "KL/chosen_KL_mean": -840.7874755859375, + "KL/mean": -925.3656005859375, + "KL/rejected_KL_mean": -1009.9437866210938, + "KL/std": 470.619140625, + "epoch": 0.9383259911894273, + "fcm_dpo/beta": 0.0010275545064359903, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 169.1563262939453, + "fcm_dpo/q_t": 0.46009236574172974, + "grad_norm": 89.78936004638672, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -0.9101927876472473, + "logits/rejected": -0.8411852121353149, + "logps/chosen": -913.1106567382812, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -1084.21875, + "loss": 1.3033, + "margin_dpo/margin_mean": 169.15634155273438, + "margin_dpo/margin_std": 571.2896728515625, + "step": 639 + }, + { + "KL/chosen_KL_mean": -661.3975830078125, + "KL/mean": -938.8929443359375, + "KL/rejected_KL_mean": -1216.3883056640625, + "KL/std": 608.6758422851562, + "epoch": 0.9397944199706314, + "fcm_dpo/beta": 0.0010085678659379482, + "fcm_dpo/delta": -0.16897350549697876, + "fcm_dpo/margin": 554.99072265625, + "fcm_dpo/q_t": 0.379363477230072, + "grad_norm": 40.64341354370117, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.9130121469497681, + "logits/rejected": -0.9402400255203247, + "logps/chosen": -717.5319213867188, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -1324.9884033203125, + "loss": 1.0178, + "margin_dpo/margin_mean": 554.99072265625, + "margin_dpo/margin_std": 715.995361328125, + "step": 640 + }, + { + "KL/chosen_KL_mean": -758.479736328125, + "KL/mean": -919.178466796875, + "KL/rejected_KL_mean": -1079.877197265625, + "KL/std": 485.34954833984375, + "epoch": 0.9412628487518355, + "fcm_dpo/beta": 0.0010051288409158587, + "fcm_dpo/delta": 0.07953417301177979, + "fcm_dpo/margin": 321.3974609375, + "fcm_dpo/q_t": 0.42806270718574524, + "grad_norm": 30.03923797607422, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -0.9936619997024536, + "logits/rejected": -0.9894883036613464, + "logps/chosen": -823.4765625, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -1166.8695068359375, + "loss": 1.1793, + "margin_dpo/margin_mean": 321.3974609375, + "margin_dpo/margin_std": 621.0615234375, + "step": 641 + }, + { + "KL/chosen_KL_mean": -754.40771484375, + "KL/mean": -960.8053588867188, + "KL/rejected_KL_mean": -1167.2030029296875, + "KL/std": 651.447021484375, + "epoch": 0.9427312775330396, + "fcm_dpo/beta": 0.0010101549560204148, + "fcm_dpo/delta": -0.017909951508045197, + "fcm_dpo/margin": 412.79522705078125, + "fcm_dpo/q_t": 0.4139317274093628, + "grad_norm": 40.49140167236328, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.9240103960037231, + "logits/rejected": -0.9492435455322266, + "logps/chosen": -820.0969848632812, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -1277.445068359375, + "loss": 1.1415, + "margin_dpo/margin_mean": 412.7952575683594, + "margin_dpo/margin_std": 782.796142578125, + "step": 642 + }, + { + "KL/chosen_KL_mean": -682.908935546875, + "KL/mean": -835.4664306640625, + "KL/rejected_KL_mean": -988.0238037109375, + "KL/std": 504.5992431640625, + "epoch": 0.9441997063142438, + "fcm_dpo/beta": 0.0010031081037595868, + "fcm_dpo/delta": -0.013042459264397621, + "fcm_dpo/margin": 305.1148986816406, + "fcm_dpo/q_t": 0.42987653613090515, + "grad_norm": 39.44825744628906, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -1.0819464921951294, + "logits/rejected": -1.0728490352630615, + "logps/chosen": -734.8589477539062, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -1075.4921875, + "loss": 1.1687, + "margin_dpo/margin_mean": 305.1148986816406, + "margin_dpo/margin_std": 513.481689453125, + "step": 643 + }, + { + "KL/chosen_KL_mean": -659.0570068359375, + "KL/mean": -824.5692138671875, + "KL/rejected_KL_mean": -990.0814208984375, + "KL/std": 571.162841796875, + "epoch": 0.9456681350954479, + "fcm_dpo/beta": 0.0010127369314432144, + "fcm_dpo/delta": 0.06703174114227295, + "fcm_dpo/margin": 331.0244140625, + "fcm_dpo/q_t": 0.4254780113697052, + "grad_norm": 36.88336181640625, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -0.9800692796707153, + "logits/rejected": -0.9834997653961182, + "logps/chosen": -718.07470703125, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -1077.2181396484375, + "loss": 1.1787, + "margin_dpo/margin_mean": 331.0244140625, + "margin_dpo/margin_std": 643.7864990234375, + "step": 644 + }, + { + "KL/chosen_KL_mean": -694.98095703125, + "KL/mean": -790.7579345703125, + "KL/rejected_KL_mean": -886.534912109375, + "KL/std": 483.12469482421875, + "epoch": 0.947136563876652, + "fcm_dpo/beta": 0.0010233320062980056, + "fcm_dpo/delta": 0.032772209495306015, + "fcm_dpo/margin": 191.55389404296875, + "fcm_dpo/q_t": 0.4574551284313202, + "grad_norm": 85.91677856445312, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -0.9518178105354309, + "logits/rejected": -0.940590500831604, + "logps/chosen": -750.8569946289062, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -984.315673828125, + "loss": 1.3102, + "margin_dpo/margin_mean": 191.55389404296875, + "margin_dpo/margin_std": 646.7799072265625, + "step": 645 + }, + { + "KL/chosen_KL_mean": -637.5367431640625, + "KL/mean": -772.5591430664062, + "KL/rejected_KL_mean": -907.58154296875, + "KL/std": 423.0977783203125, + "epoch": 0.9486049926578561, + "fcm_dpo/beta": 0.0010392502881586552, + "fcm_dpo/delta": 0.12298852950334549, + "fcm_dpo/margin": 270.04473876953125, + "fcm_dpo/q_t": 0.4354844391345978, + "grad_norm": 37.95448684692383, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -0.9876176118850708, + "logits/rejected": -0.9597277641296387, + "logps/chosen": -698.8125610351562, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -985.0872802734375, + "loss": 1.1861, + "margin_dpo/margin_mean": 270.04473876953125, + "margin_dpo/margin_std": 478.6196594238281, + "step": 646 + }, + { + "KL/chosen_KL_mean": -578.6500854492188, + "KL/mean": -764.76220703125, + "KL/rejected_KL_mean": -950.8743896484375, + "KL/std": 502.62335205078125, + "epoch": 0.9500734214390602, + "fcm_dpo/beta": 0.001051081228069961, + "fcm_dpo/delta": 0.009098398499190807, + "fcm_dpo/margin": 372.2242736816406, + "fcm_dpo/q_t": 0.40922337770462036, + "grad_norm": 39.47010040283203, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.8613879680633545, + "logits/rejected": -0.8856191039085388, + "logps/chosen": -633.5025024414062, + "logps/ref_chosen": -54.8524169921875, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -1044.393798828125, + "loss": 1.0951, + "margin_dpo/margin_mean": 372.22430419921875, + "margin_dpo/margin_std": 483.755615234375, + "step": 647 + }, + { + "KL/chosen_KL_mean": -618.8386840820312, + "KL/mean": -882.6348266601562, + "KL/rejected_KL_mean": -1146.430908203125, + "KL/std": 597.5460815429688, + "epoch": 0.9515418502202643, + "fcm_dpo/beta": 0.0010257565882056952, + "fcm_dpo/delta": -0.14960268139839172, + "fcm_dpo/margin": 527.59228515625, + "fcm_dpo/q_t": 0.38258910179138184, + "grad_norm": 26.944942474365234, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -1.024482011795044, + "logits/rejected": -1.0812674760818481, + "logps/chosen": -673.0101318359375, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.7127914428711, + "logps/rejected": -1245.143798828125, + "loss": 1.0229, + "margin_dpo/margin_mean": 527.59228515625, + "margin_dpo/margin_std": 698.5792846679688, + "step": 648 + }, + { + "KL/chosen_KL_mean": -641.0887451171875, + "KL/mean": -776.3487548828125, + "KL/rejected_KL_mean": -911.6088256835938, + "KL/std": 461.4584045410156, + "epoch": 0.9530102790014684, + "fcm_dpo/beta": 0.00104125018697232, + "fcm_dpo/delta": 0.12126278877258301, + "fcm_dpo/margin": 270.52008056640625, + "fcm_dpo/q_t": 0.43895599246025085, + "grad_norm": 37.985782623291016, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -1.062050223350525, + "logits/rejected": -1.049740195274353, + "logps/chosen": -703.569091796875, + "logps/ref_chosen": -62.480350494384766, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -991.68603515625, + "loss": 1.2132, + "margin_dpo/margin_mean": 270.5200500488281, + "margin_dpo/margin_std": 580.6653442382812, + "step": 649 + }, + { + "KL/chosen_KL_mean": -681.760498046875, + "KL/mean": -890.84912109375, + "KL/rejected_KL_mean": -1099.9375, + "KL/std": 568.1502685546875, + "epoch": 0.9544787077826725, + "fcm_dpo/beta": 0.0010373436380177736, + "fcm_dpo/delta": -0.035463616251945496, + "fcm_dpo/margin": 418.1771545410156, + "fcm_dpo/q_t": 0.40345823764801025, + "grad_norm": 34.3553352355957, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -0.9683902859687805, + "logits/rejected": -0.9844076037406921, + "logps/chosen": -737.853271484375, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -1198.202392578125, + "loss": 1.1016, + "margin_dpo/margin_mean": 418.1771545410156, + "margin_dpo/margin_std": 649.6616821289062, + "step": 650 + }, + { + "KL/chosen_KL_mean": -489.3945007324219, + "KL/mean": -743.3966674804688, + "KL/rejected_KL_mean": -997.3988037109375, + "KL/std": 551.2980346679688, + "epoch": 0.9559471365638766, + "fcm_dpo/beta": 0.0010176938958466053, + "fcm_dpo/delta": -0.12348881363868713, + "fcm_dpo/margin": 508.0043640136719, + "fcm_dpo/q_t": 0.38280242681503296, + "grad_norm": 31.118337631225586, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -1.0014972686767578, + "logits/rejected": -1.0536954402923584, + "logps/chosen": -532.8199462890625, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.95791625976562, + "logps/rejected": -1097.356689453125, + "loss": 1.0078, + "margin_dpo/margin_mean": 508.0043640136719, + "margin_dpo/margin_std": 563.4285888671875, + "step": 651 + }, + { + "KL/chosen_KL_mean": -616.5572509765625, + "KL/mean": -798.780029296875, + "KL/rejected_KL_mean": -981.0028076171875, + "KL/std": 537.9312744140625, + "epoch": 0.9574155653450808, + "fcm_dpo/beta": 0.001018517417833209, + "fcm_dpo/delta": 0.029787715524435043, + "fcm_dpo/margin": 364.4455871582031, + "fcm_dpo/q_t": 0.41681456565856934, + "grad_norm": 38.289588928222656, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -1.037517786026001, + "logits/rejected": -1.0513508319854736, + "logps/chosen": -679.134033203125, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -1092.7706298828125, + "loss": 1.1381, + "margin_dpo/margin_mean": 364.44561767578125, + "margin_dpo/margin_std": 614.3720703125, + "step": 652 + }, + { + "KL/chosen_KL_mean": -721.114013671875, + "KL/mean": -896.546630859375, + "KL/rejected_KL_mean": -1071.979248046875, + "KL/std": 555.6015625, + "epoch": 0.9588839941262849, + "fcm_dpo/beta": 0.0010254649678245187, + "fcm_dpo/delta": 0.04164200276136398, + "fcm_dpo/margin": 350.8651123046875, + "fcm_dpo/q_t": 0.4201071858406067, + "grad_norm": 32.48366165161133, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -1.0504939556121826, + "logits/rejected": -1.0696086883544922, + "logps/chosen": -782.2269897460938, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -1175.228759765625, + "loss": 1.1448, + "margin_dpo/margin_mean": 350.8651123046875, + "margin_dpo/margin_std": 597.3900146484375, + "step": 653 + }, + { + "KL/chosen_KL_mean": -651.263427734375, + "KL/mean": -819.9118041992188, + "KL/rejected_KL_mean": -988.5601196289062, + "KL/std": 463.3123779296875, + "epoch": 0.960352422907489, + "fcm_dpo/beta": 0.0010335429105907679, + "fcm_dpo/delta": 0.053280387073755264, + "fcm_dpo/margin": 337.2967224121094, + "fcm_dpo/q_t": 0.4215894043445587, + "grad_norm": 29.535350799560547, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -1.0323097705841064, + "logits/rejected": -1.0458433628082275, + "logps/chosen": -712.9915771484375, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -1087.333984375, + "loss": 1.1394, + "margin_dpo/margin_mean": 337.2967224121094, + "margin_dpo/margin_std": 535.9071044921875, + "step": 654 + }, + { + "KL/chosen_KL_mean": -600.1435546875, + "KL/mean": -785.5983276367188, + "KL/rejected_KL_mean": -971.0531005859375, + "KL/std": 495.2772216796875, + "epoch": 0.9618208516886931, + "fcm_dpo/beta": 0.0010393604170531034, + "fcm_dpo/delta": 0.01507401093840599, + "fcm_dpo/margin": 370.9095458984375, + "fcm_dpo/q_t": 0.4142192006111145, + "grad_norm": 32.12811279296875, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -0.987531304359436, + "logits/rejected": -1.030656337738037, + "logps/chosen": -649.7203369140625, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -1069.344970703125, + "loss": 1.1228, + "margin_dpo/margin_mean": 370.9095458984375, + "margin_dpo/margin_std": 588.18408203125, + "step": 655 + }, + { + "KL/chosen_KL_mean": -635.6533203125, + "KL/mean": -928.177734375, + "KL/rejected_KL_mean": -1220.7020263671875, + "KL/std": 598.5955810546875, + "epoch": 0.9632892804698973, + "fcm_dpo/beta": 0.0010131911840289831, + "fcm_dpo/delta": -0.20445646345615387, + "fcm_dpo/margin": 585.0487060546875, + "fcm_dpo/q_t": 0.3669106960296631, + "grad_norm": 47.409523010253906, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.9014885425567627, + "logits/rejected": -0.9727605581283569, + "logps/chosen": -688.2027587890625, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -1334.376708984375, + "loss": 0.9602, + "margin_dpo/margin_mean": 585.0487060546875, + "margin_dpo/margin_std": 599.3031005859375, + "step": 656 + }, + { + "KL/chosen_KL_mean": -615.9948120117188, + "KL/mean": -864.385498046875, + "KL/rejected_KL_mean": -1112.776123046875, + "KL/std": 595.528564453125, + "epoch": 0.9647577092511013, + "fcm_dpo/beta": 0.0009820859413594007, + "fcm_dpo/delta": -0.09292930364608765, + "fcm_dpo/margin": 496.7813720703125, + "fcm_dpo/q_t": 0.39006322622299194, + "grad_norm": 31.6498966217041, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -1.0363855361938477, + "logits/rejected": -1.0796374082565308, + "logps/chosen": -662.6953125, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -1210.6910400390625, + "loss": 1.0397, + "margin_dpo/margin_mean": 496.7813720703125, + "margin_dpo/margin_std": 621.57470703125, + "step": 657 + }, + { + "KL/chosen_KL_mean": -664.8219604492188, + "KL/mean": -847.6753540039062, + "KL/rejected_KL_mean": -1030.5286865234375, + "KL/std": 466.9921875, + "epoch": 0.9662261380323054, + "fcm_dpo/beta": 0.00098237837664783, + "fcm_dpo/delta": 0.04202239215373993, + "fcm_dpo/margin": 365.70672607421875, + "fcm_dpo/q_t": 0.4168873727321625, + "grad_norm": 35.535884857177734, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -0.9736270904541016, + "logits/rejected": -0.9889022707939148, + "logps/chosen": -725.7801513671875, + "logps/ref_chosen": -60.95820999145508, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -1126.46826171875, + "loss": 1.124, + "margin_dpo/margin_mean": 365.70672607421875, + "margin_dpo/margin_std": 524.6923217773438, + "step": 658 + }, + { + "KL/chosen_KL_mean": -604.6826782226562, + "KL/mean": -799.8530883789062, + "KL/rejected_KL_mean": -995.0235595703125, + "KL/std": 473.0037841796875, + "epoch": 0.9676945668135095, + "fcm_dpo/beta": 0.0009883574675768614, + "fcm_dpo/delta": 0.014654016122221947, + "fcm_dpo/margin": 390.34075927734375, + "fcm_dpo/q_t": 0.41146624088287354, + "grad_norm": 25.766517639160156, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.9188249111175537, + "logits/rejected": -0.8752338886260986, + "logps/chosen": -681.4256591796875, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -1082.4945068359375, + "loss": 1.0978, + "margin_dpo/margin_mean": 390.3408203125, + "margin_dpo/margin_std": 514.568359375, + "step": 659 + }, + { + "KL/chosen_KL_mean": -646.2470703125, + "KL/mean": -875.0294189453125, + "KL/rejected_KL_mean": -1103.811767578125, + "KL/std": 534.911865234375, + "epoch": 0.9691629955947136, + "fcm_dpo/beta": 0.000986184342764318, + "fcm_dpo/delta": -0.0536465048789978, + "fcm_dpo/margin": 457.5646057128906, + "fcm_dpo/q_t": 0.39679035544395447, + "grad_norm": 49.228233337402344, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -0.9931929111480713, + "logits/rejected": -0.995282769203186, + "logps/chosen": -705.294921875, + "logps/ref_chosen": -59.04788589477539, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -1179.771728515625, + "loss": 1.0585, + "margin_dpo/margin_mean": 457.5645751953125, + "margin_dpo/margin_std": 571.7623291015625, + "step": 660 + }, + { + "KL/chosen_KL_mean": -567.4749145507812, + "KL/mean": -781.959716796875, + "KL/rejected_KL_mean": -996.4445190429688, + "KL/std": 581.6163330078125, + "epoch": 0.9706314243759178, + "fcm_dpo/beta": 0.0009824851294979453, + "fcm_dpo/delta": -0.022726453840732574, + "fcm_dpo/margin": 428.9695739746094, + "fcm_dpo/q_t": 0.40450412034988403, + "grad_norm": 37.587955474853516, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -1.0141196250915527, + "logits/rejected": -1.0162596702575684, + "logps/chosen": -618.14892578125, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -1082.4501953125, + "loss": 1.0753, + "margin_dpo/margin_mean": 428.96954345703125, + "margin_dpo/margin_std": 554.69189453125, + "step": 661 + }, + { + "KL/chosen_KL_mean": -687.85302734375, + "KL/mean": -843.4173583984375, + "KL/rejected_KL_mean": -998.9815673828125, + "KL/std": 508.81829833984375, + "epoch": 0.9720998531571219, + "fcm_dpo/beta": 0.0009899393189698458, + "fcm_dpo/delta": 0.09499240666627884, + "fcm_dpo/margin": 311.1284484863281, + "fcm_dpo/q_t": 0.4304784834384918, + "grad_norm": 34.92011260986328, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -0.9697600603103638, + "logits/rejected": -0.9555808901786804, + "logps/chosen": -757.1141357421875, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -1088.0374755859375, + "loss": 1.1943, + "margin_dpo/margin_mean": 311.1284484863281, + "margin_dpo/margin_std": 634.947998046875, + "step": 662 + }, + { + "KL/chosen_KL_mean": -626.7081298828125, + "KL/mean": -834.0655517578125, + "KL/rejected_KL_mean": -1041.4229736328125, + "KL/std": 581.6314697265625, + "epoch": 0.973568281938326, + "fcm_dpo/beta": 0.000994151458144188, + "fcm_dpo/delta": -0.01281630527228117, + "fcm_dpo/margin": 414.7147521972656, + "fcm_dpo/q_t": 0.40818944573402405, + "grad_norm": 28.98455047607422, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -1.0159096717834473, + "logits/rejected": -1.0367473363876343, + "logps/chosen": -691.5870361328125, + "logps/ref_chosen": -64.87890625, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -1155.348388671875, + "loss": 1.1103, + "margin_dpo/margin_mean": 414.71478271484375, + "margin_dpo/margin_std": 667.8819580078125, + "step": 663 + }, + { + "KL/chosen_KL_mean": -645.205322265625, + "KL/mean": -881.735107421875, + "KL/rejected_KL_mean": -1118.2647705078125, + "KL/std": 575.0771484375, + "epoch": 0.9750367107195301, + "fcm_dpo/beta": 0.000979449599981308, + "fcm_dpo/delta": -0.06696485728025436, + "fcm_dpo/margin": 473.0594482421875, + "fcm_dpo/q_t": 0.3954671621322632, + "grad_norm": 26.692996978759766, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -0.9881083965301514, + "logits/rejected": -1.0177645683288574, + "logps/chosen": -706.0938110351562, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -1223.7864990234375, + "loss": 1.0597, + "margin_dpo/margin_mean": 473.0594177246094, + "margin_dpo/margin_std": 619.4970703125, + "step": 664 + }, + { + "KL/chosen_KL_mean": -595.4884033203125, + "KL/mean": -785.0396728515625, + "KL/rejected_KL_mean": -974.5908813476562, + "KL/std": 466.5865478515625, + "epoch": 0.9765051395007343, + "fcm_dpo/beta": 0.0009838908445090055, + "fcm_dpo/delta": 0.028051599860191345, + "fcm_dpo/margin": 379.10247802734375, + "fcm_dpo/q_t": 0.41350919008255005, + "grad_norm": 33.21379089355469, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -1.076425313949585, + "logits/rejected": -1.0883920192718506, + "logps/chosen": -656.0525512695312, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.80882263183594, + "logps/rejected": -1059.399658203125, + "loss": 1.0975, + "margin_dpo/margin_mean": 379.10247802734375, + "margin_dpo/margin_std": 466.126220703125, + "step": 665 + }, + { + "KL/chosen_KL_mean": -640.5621337890625, + "KL/mean": -844.2626953125, + "KL/rejected_KL_mean": -1047.96337890625, + "KL/std": 506.4635314941406, + "epoch": 0.9779735682819384, + "fcm_dpo/beta": 0.00098421610891819, + "fcm_dpo/delta": -0.001031767576932907, + "fcm_dpo/margin": 407.4012451171875, + "fcm_dpo/q_t": 0.4083176255226135, + "grad_norm": 28.340333938598633, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.9047819375991821, + "logits/rejected": -0.9091357588768005, + "logps/chosen": -704.9820556640625, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.8916244506836, + "logps/rejected": -1143.85498046875, + "loss": 1.0931, + "margin_dpo/margin_mean": 407.4012451171875, + "margin_dpo/margin_std": 544.3861083984375, + "step": 666 + }, + { + "KL/chosen_KL_mean": -708.0230712890625, + "KL/mean": -926.270751953125, + "KL/rejected_KL_mean": -1144.5185546875, + "KL/std": 533.2125244140625, + "epoch": 0.9794419970631424, + "fcm_dpo/beta": 0.0009798547253012657, + "fcm_dpo/delta": -0.028992321342229843, + "fcm_dpo/margin": 436.495361328125, + "fcm_dpo/q_t": 0.40200570225715637, + "grad_norm": 37.009464263916016, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -0.9339680671691895, + "logits/rejected": -0.918968677520752, + "logps/chosen": -777.3001098632812, + "logps/ref_chosen": -69.27702331542969, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -1232.35400390625, + "loss": 1.0786, + "margin_dpo/margin_mean": 436.495361328125, + "margin_dpo/margin_std": 579.8436889648438, + "step": 667 + }, + { + "KL/chosen_KL_mean": -741.93408203125, + "KL/mean": -907.15625, + "KL/rejected_KL_mean": -1072.37841796875, + "KL/std": 566.6580810546875, + "epoch": 0.9809104258443465, + "fcm_dpo/beta": 0.0009952853433787823, + "fcm_dpo/delta": 0.07259482145309448, + "fcm_dpo/margin": 330.4443359375, + "fcm_dpo/q_t": 0.42794138193130493, + "grad_norm": 33.102317810058594, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -1.0173263549804688, + "logits/rejected": -1.014156460762024, + "logps/chosen": -814.5380859375, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905944824219, + "logps/rejected": -1176.117431640625, + "loss": 1.1857, + "margin_dpo/margin_mean": 330.4443359375, + "margin_dpo/margin_std": 662.1550903320312, + "step": 668 + }, + { + "KL/chosen_KL_mean": -590.9251708984375, + "KL/mean": -818.6317749023438, + "KL/rejected_KL_mean": -1046.33837890625, + "KL/std": 550.6939697265625, + "epoch": 0.9823788546255506, + "fcm_dpo/beta": 0.000986847560852766, + "fcm_dpo/delta": -0.051722507923841476, + "fcm_dpo/margin": 455.413330078125, + "fcm_dpo/q_t": 0.3974974751472473, + "grad_norm": 25.456424713134766, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -0.9681833982467651, + "logits/rejected": -0.9636249542236328, + "logps/chosen": -637.0415649414062, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -1124.2626953125, + "loss": 1.0695, + "margin_dpo/margin_mean": 455.41326904296875, + "margin_dpo/margin_std": 612.3760986328125, + "step": 669 + }, + { + "KL/chosen_KL_mean": -551.4520263671875, + "KL/mean": -797.196533203125, + "KL/rejected_KL_mean": -1042.94091796875, + "KL/std": 524.746826171875, + "epoch": 0.9838472834067548, + "fcm_dpo/beta": 0.000975792994722724, + "fcm_dpo/delta": -0.08364107459783554, + "fcm_dpo/margin": 491.48895263671875, + "fcm_dpo/q_t": 0.39131563901901245, + "grad_norm": 32.25609588623047, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.9351658225059509, + "logits/rejected": -0.9491223096847534, + "logps/chosen": -613.7977905273438, + "logps/ref_chosen": -62.34575271606445, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -1139.881591796875, + "loss": 1.049, + "margin_dpo/margin_mean": 491.48895263671875, + "margin_dpo/margin_std": 618.3614501953125, + "step": 670 + }, + { + "KL/chosen_KL_mean": -666.8336181640625, + "KL/mean": -867.621337890625, + "KL/rejected_KL_mean": -1068.4091796875, + "KL/std": 490.7064514160156, + "epoch": 0.9853157121879589, + "fcm_dpo/beta": 0.0009718855144456029, + "fcm_dpo/delta": 0.009846452623605728, + "fcm_dpo/margin": 401.5755310058594, + "fcm_dpo/q_t": 0.4098934829235077, + "grad_norm": 28.569211959838867, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -0.9902809858322144, + "logits/rejected": -1.0138908624649048, + "logps/chosen": -714.833740234375, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -1152.228515625, + "loss": 1.1089, + "margin_dpo/margin_mean": 401.5755310058594, + "margin_dpo/margin_std": 580.24072265625, + "step": 671 + }, + { + "KL/chosen_KL_mean": -731.397216796875, + "KL/mean": -927.7864990234375, + "KL/rejected_KL_mean": -1124.175537109375, + "KL/std": 593.037109375, + "epoch": 0.986784140969163, + "fcm_dpo/beta": 0.0009703624527901411, + "fcm_dpo/delta": 0.019587505608797073, + "fcm_dpo/margin": 392.7784423828125, + "fcm_dpo/q_t": 0.41550886631011963, + "grad_norm": 36.741268157958984, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -1.065507411956787, + "logits/rejected": -1.0642685890197754, + "logps/chosen": -789.9805297851562, + "logps/ref_chosen": -58.58328628540039, + "logps/ref_rejected": -93.14015197753906, + "logps/rejected": -1217.3157958984375, + "loss": 1.1383, + "margin_dpo/margin_mean": 392.7784118652344, + "margin_dpo/margin_std": 660.264892578125, + "step": 672 + }, + { + "KL/chosen_KL_mean": -674.35888671875, + "KL/mean": -860.500732421875, + "KL/rejected_KL_mean": -1046.642578125, + "KL/std": 500.42138671875, + "epoch": 0.9882525697503671, + "fcm_dpo/beta": 0.0009781282860785723, + "fcm_dpo/delta": 0.03722069412469864, + "fcm_dpo/margin": 372.2838134765625, + "fcm_dpo/q_t": 0.41715848445892334, + "grad_norm": 30.6710262298584, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -1.0316221714019775, + "logits/rejected": -1.03529691696167, + "logps/chosen": -721.08203125, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -1131.9388427734375, + "loss": 1.1215, + "margin_dpo/margin_mean": 372.2838134765625, + "margin_dpo/margin_std": 559.671142578125, + "step": 673 + }, + { + "KL/chosen_KL_mean": -570.3616943359375, + "KL/mean": -808.6674194335938, + "KL/rejected_KL_mean": -1046.97314453125, + "KL/std": 530.7771606445312, + "epoch": 0.9897209985315712, + "fcm_dpo/beta": 0.0009717537323012948, + "fcm_dpo/delta": -0.06616582721471786, + "fcm_dpo/margin": 476.61138916015625, + "fcm_dpo/q_t": 0.39771580696105957, + "grad_norm": 31.129558563232422, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.929929792881012, + "logits/rejected": -0.9406229257583618, + "logps/chosen": -615.8072509765625, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -1117.01904296875, + "loss": 1.0577, + "margin_dpo/margin_mean": 476.61138916015625, + "margin_dpo/margin_std": 633.5640258789062, + "step": 674 + }, + { + "KL/chosen_KL_mean": -638.90673828125, + "KL/mean": -887.892578125, + "KL/rejected_KL_mean": -1136.87841796875, + "KL/std": 577.7835693359375, + "epoch": 0.9911894273127754, + "fcm_dpo/beta": 0.0009487034403719008, + "fcm_dpo/delta": -0.07788591086864471, + "fcm_dpo/margin": 497.97161865234375, + "fcm_dpo/q_t": 0.3958815634250641, + "grad_norm": 26.884960174560547, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -0.9694858193397522, + "logits/rejected": -0.984051525592804, + "logps/chosen": -683.0830688476562, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -1210.970458984375, + "loss": 1.0522, + "margin_dpo/margin_mean": 497.97161865234375, + "margin_dpo/margin_std": 613.9585571289062, + "step": 675 + }, + { + "KL/chosen_KL_mean": -707.06591796875, + "KL/mean": -924.9794921875, + "KL/rejected_KL_mean": -1142.89306640625, + "KL/std": 531.7125244140625, + "epoch": 0.9926578560939795, + "fcm_dpo/beta": 0.0009498898871243, + "fcm_dpo/delta": -0.01460132747888565, + "fcm_dpo/margin": 435.82708740234375, + "fcm_dpo/q_t": 0.4043177366256714, + "grad_norm": 30.11288833618164, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -0.9417062997817993, + "logits/rejected": -0.9432613849639893, + "logps/chosen": -778.4644775390625, + "logps/ref_chosen": -71.39852905273438, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -1231.2518310546875, + "loss": 1.074, + "margin_dpo/margin_mean": 435.8271179199219, + "margin_dpo/margin_std": 524.7921142578125, + "step": 676 + }, + { + "KL/chosen_KL_mean": -676.025634765625, + "KL/mean": -884.4794921875, + "KL/rejected_KL_mean": -1092.933349609375, + "KL/std": 490.99835205078125, + "epoch": 0.9941262848751835, + "fcm_dpo/beta": 0.0009504948975518346, + "fcm_dpo/delta": 0.0038806493394076824, + "fcm_dpo/margin": 416.90771484375, + "fcm_dpo/q_t": 0.4103269577026367, + "grad_norm": 33.28916931152344, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -1.00029718875885, + "logits/rejected": -0.9821897745132446, + "logps/chosen": -732.5531005859375, + "logps/ref_chosen": -56.527435302734375, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -1171.159912109375, + "loss": 1.0959, + "margin_dpo/margin_mean": 416.90771484375, + "margin_dpo/margin_std": 574.4068603515625, + "step": 677 + }, + { + "KL/chosen_KL_mean": -570.7870483398438, + "KL/mean": -824.8927612304688, + "KL/rejected_KL_mean": -1078.99853515625, + "KL/std": 591.4881591796875, + "epoch": 0.9955947136563876, + "fcm_dpo/beta": 0.00094210309907794, + "fcm_dpo/delta": -0.08271745592355728, + "fcm_dpo/margin": 508.2114562988281, + "fcm_dpo/q_t": 0.39141643047332764, + "grad_norm": 30.350744247436523, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -0.9413450956344604, + "logits/rejected": -0.971659243106842, + "logps/chosen": -616.9215087890625, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -1159.6031494140625, + "loss": 1.0481, + "margin_dpo/margin_mean": 508.21142578125, + "margin_dpo/margin_std": 649.2615966796875, + "step": 678 + }, + { + "KL/chosen_KL_mean": -671.6044921875, + "KL/mean": -847.1055908203125, + "KL/rejected_KL_mean": -1022.606689453125, + "KL/std": 466.68133544921875, + "epoch": 0.9970631424375918, + "fcm_dpo/beta": 0.0009439511341042817, + "fcm_dpo/delta": 0.07106737792491913, + "fcm_dpo/margin": 351.002197265625, + "fcm_dpo/q_t": 0.42417770624160767, + "grad_norm": 31.743593215942383, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -0.9747291803359985, + "logits/rejected": -0.9602512717247009, + "logps/chosen": -721.8994140625, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -1099.204833984375, + "loss": 1.1447, + "margin_dpo/margin_mean": 351.002197265625, + "margin_dpo/margin_std": 545.5799560546875, + "step": 679 + }, + { + "KL/chosen_KL_mean": -670.5235595703125, + "KL/mean": -899.030517578125, + "KL/rejected_KL_mean": -1127.53759765625, + "KL/std": 594.6282958984375, + "epoch": 0.9985315712187959, + "fcm_dpo/beta": 0.0009394378867000341, + "fcm_dpo/delta": -0.031213950365781784, + "fcm_dpo/margin": 457.013916015625, + "fcm_dpo/q_t": 0.4019904136657715, + "grad_norm": 32.0710334777832, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -0.9574640989303589, + "logits/rejected": -0.9788249731063843, + "logps/chosen": -747.4393310546875, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -1239.92236328125, + "loss": 1.097, + "margin_dpo/margin_mean": 457.013916015625, + "margin_dpo/margin_std": 679.540283203125, + "step": 680 + }, + { + "KL/chosen_KL_mean": -644.1123046875, + "KL/mean": -826.009765625, + "KL/rejected_KL_mean": -1007.9073486328125, + "KL/std": 493.124755859375, + "epoch": 1.0, + "fcm_dpo/beta": 0.0009560231701470912, + "fcm_dpo/delta": 0.05282256752252579, + "fcm_dpo/margin": 363.79510498046875, + "fcm_dpo/q_t": 0.42025691270828247, + "grad_norm": 24.496997833251953, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -1.0301257371902466, + "logits/rejected": -1.0402805805206299, + "logps/chosen": -705.069580078125, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.55797576904297, + "logps/rejected": -1096.46533203125, + "loss": 1.1386, + "margin_dpo/margin_mean": 363.79510498046875, + "margin_dpo/margin_std": 546.2330932617188, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 1.126299982641587, + "train_runtime": 1736.7793, + "train_samples_per_second": 25.103, + "train_steps_per_second": 0.392 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}