From 0b9a2051559b9aa700989f14ed4b83c662900f8a Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 26 May 2026 12:34:28 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama3-hh-helpful-qt045-b0p05-20260429-085449 Source: Original Platform --- .gitattributes | 36 + README.md | 62 + all_results.json | 9 + config.json | 29 + generation_config.json | 9 + margin_logs/margins.jsonl | 681 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + margin_logs/step_0000662.npy | 3 + margin_logs/step_0000663.npy | 3 + margin_logs/step_0000664.npy | 3 + margin_logs/step_0000665.npy | 3 + margin_logs/step_0000666.npy | 3 + margin_logs/step_0000667.npy | 3 + margin_logs/step_0000668.npy | 3 + margin_logs/step_0000669.npy | 3 + margin_logs/step_0000670.npy | 3 + margin_logs/step_0000671.npy | 3 + margin_logs/step_0000672.npy | 3 + margin_logs/step_0000673.npy | 3 + margin_logs/step_0000674.npy | 3 + margin_logs/step_0000675.npy | 3 + margin_logs/step_0000676.npy | 3 + margin_logs/step_0000677.npy | 3 + margin_logs/step_0000678.npy | 3 + margin_logs/step_0000679.npy | 3 + margin_logs/step_0000680.npy | 3 + margin_logs/step_0000681.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++ train.log | 1160 +++ train_results.json | 9 + trainer_state.json | 15706 +++++++++++++++++++++++++++++ 701 files changed, 22153 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 margin_logs/step_0000662.npy create mode 100644 margin_logs/step_0000663.npy create mode 100644 margin_logs/step_0000664.npy create mode 100644 margin_logs/step_0000665.npy create mode 100644 margin_logs/step_0000666.npy create mode 100644 margin_logs/step_0000667.npy create mode 100644 margin_logs/step_0000668.npy create mode 100644 margin_logs/step_0000669.npy create mode 100644 margin_logs/step_0000670.npy create mode 100644 margin_logs/step_0000671.npy create mode 100644 margin_logs/step_0000672.npy create mode 100644 margin_logs/step_0000673.npy create mode 100644 margin_logs/step_0000674.npy create mode 100644 margin_logs/step_0000675.npy create mode 100644 margin_logs/step_0000676.npy create mode 100644 margin_logs/step_0000677.npy create mode 100644 margin_logs/step_0000678.npy create mode 100644 margin_logs/step_0000679.npy create mode 100644 margin_logs/step_0000680.npy create mode 100644 margin_logs/step_0000681.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.log create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..90fe550 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..112710c --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.1094634984558374, + "train_runtime": 1738.7131, + "train_samples": 43598, + "train_samples_per_second": 25.075, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..bbb4f90 --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.015658140182495117, "std": 0.3931349515914917, "min": -0.8375015258789062, "p10": -0.4415412902832031, "median": -0.041319847106933594, "p90": 0.4769941329956056, "max": 1.2759246826171875, "pos_frac": 0.4375, "sample": [-0.0051898956298828125, 0.0579986572265625, -0.11006927490234375, 1.2759246826171875, -0.3489208221435547, -0.04048347473144531, 0.29725074768066406, 0.24773216247558594, -0.07208251953125, 0.009889602661132812, -0.49124908447265625, -0.042156219482421875, -0.194305419921875, -0.5728988647460938, 0.0055332183837890625, 0.30835723876953125, 0.2819061279296875, 0.4946632385253906, 0.214202880859375, -0.4156627655029297, -0.8375015258789062, 0.325897216796875, 0.29927825927734375, -0.2085723876953125, -0.8128509521484375, -0.34453582763671875, 0.00048828125, 0.43576622009277344, 0.2576141357421875, -0.399444580078125, 0.559814453125, 0.8416976928710938, -0.11800193786621094, -0.09100341796875, -0.018003463745117188, -0.10027313232421875, -0.5291595458984375, -0.15914344787597656, 0.11544036865234375, -0.1276702880859375, -0.5084991455078125, -0.090972900390625, -0.180816650390625, -0.02978515625, 0.272796630859375, -0.3883056640625, -0.41355133056640625, 0.7912101745605469, -0.4029502868652344, -0.43085479736328125, 0.18115615844726562, -0.3004341125488281, -0.054759979248046875, 0.01683807373046875, 0.14450836181640625, 0.76715087890625, 0.11634063720703125, -0.25457763671875, -0.3206024169921875, 0.539398193359375, 0.12883758544921875, 0.17095947265625, -0.2993621826171875, -0.4461212158203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.036018311977386475, "std": 0.3587802052497864, "min": -0.925537109375, "p10": -0.48462104797363276, "median": -0.04270172119140625, "p90": 0.43331832885742194, "max": 0.936065673828125, "pos_frac": 0.390625, "sample": [0.2908210754394531, -0.2829437255859375, -0.1501789093017578, 0.36823272705078125, -0.6035537719726562, -0.06729507446289062, 0.15234375, -0.36602783203125, -0.41641807556152344, 0.14521026611328125, -0.047283172607421875, 0.5647811889648438, -0.00652313232421875, -0.16985321044921875, -0.1952056884765625, -0.01318359375, -0.5085792541503906, -0.35166168212890625, -0.30796051025390625, -0.06627464294433594, -0.7504653930664062, -0.197265625, -0.1905517578125, -0.03943634033203125, -0.05162811279296875, -0.039520263671875, 0.4402008056640625, -0.19553756713867188, 0.05075836181640625, -0.79180908203125, -0.030344009399414062, 0.2085132598876953, -0.42871856689453125, -0.03533172607421875, -0.16347694396972656, 0.2800006866455078, -0.5642662048339844, 0.936065673828125, -0.2528495788574219, 0.456298828125, 0.11753082275390625, -0.19165992736816406, 0.41725921630859375, 0.1950836181640625, 0.11084747314453125, -0.002605438232421875, 0.293212890625, 0.2449798583984375, 0.035678863525390625, -0.7140960693359375, -0.12926101684570312, 0.5840797424316406, 0.45304107666015625, 0.1726531982421875, 0.1567096710205078, 0.0697021484375, 0.6154251098632812, -0.08661270141601562, 0.3491859436035156, -0.15857315063476562, -0.0458831787109375, -0.09375572204589844, -0.925537109375, -0.38166046142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.027777403593063354, "std": 0.3420843482017517, "min": -0.7743988037109375, "p10": -0.42810897827148436, "median": 0.051219940185546875, "p90": 0.4526695251464846, "max": 0.8915786743164062, "pos_frac": 0.53125, "sample": [-0.29157257080078125, 0.47571563720703125, -0.14392852783203125, 0.23302841186523438, 0.8915786743164062, -0.40155029296875, -0.17276763916015625, -0.0030498504638671875, -0.13559341430664062, 0.0474395751953125, -0.17301177978515625, 0.264739990234375, -0.0041561126708984375, 0.19454193115234375, -0.34299468994140625, 0.11423110961914062, -0.117706298828125, -0.5872459411621094, 0.2611198425292969, -0.3226165771484375, -0.1662750244140625, 0.870452880859375, -0.21020889282226562, 0.09019088745117188, 0.0199127197265625, 0.12286376953125, 0.14850616455078125, 0.3129692077636719, -0.3033599853515625, 0.24124908447265625, -0.4634113311767578, -0.18390846252441406, 0.398895263671875, 0.15839767456054688, 0.19651031494140625, 0.550201416015625, 0.27150726318359375, -0.0257110595703125, 0.277130126953125, 0.26573944091796875, -0.0086517333984375, -0.07102203369140625, 0.495208740234375, -0.7743988037109375, 0.38887786865234375, 0.07486724853515625, -0.43949127197265625, -0.0272064208984375, -0.7345733642578125, -0.47322845458984375, 0.11308479309082031, 0.13492202758789062, 0.34607696533203125, -0.05937957763671875, -0.13585662841796875, 0.09344482421875, 0.647247314453125, -0.17372894287109375, -0.2872314453125, 0.5024833679199219, 0.1175994873046875, 0.14495086669921875, -0.50909423828125, 0.05500030517578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.03913196921348572, "std": 0.39269861578941345, "min": -0.8067741394042969, "p10": -0.450848388671875, "median": 0.06663894653320312, "p90": 0.506412696838379, "max": 0.83380126953125, "pos_frac": 0.578125, "sample": [0.2060089111328125, 0.5368900299072266, 0.06177520751953125, -0.1323699951171875, 0.14480209350585938, 0.12994384765625, -0.1610851287841797, -0.34391021728515625, 0.08258056640625, -0.4073486328125, 0.30466461181640625, 0.7540512084960938, -0.4200439453125, 0.4360942840576172, -0.3580780029296875, -0.6285552978515625, 0.36684417724609375, -0.21984291076660156, -0.22744178771972656, 0.4492950439453125, -0.4139747619628906, -0.3361968994140625, 0.248046875, 0.0004062652587890625, -0.2219066619873047, 0.0129241943359375, 0.14292144775390625, 0.25690460205078125, 0.071502685546875, 0.4040946960449219, 0.39574432373046875, 0.26462745666503906, 0.30571746826171875, 0.4032554626464844, -0.1747283935546875, 0.0388641357421875, -0.540985107421875, -0.8067741394042969, 0.13100242614746094, -0.46405029296875, 0.650054931640625, 0.83380126953125, -0.668121337890625, 0.5025768280029297, 0.09994125366210938, 0.05031776428222656, 0.3532543182373047, 0.2777996063232422, 0.441192626953125, -0.34412384033203125, -0.1116180419921875, -0.1627655029296875, -0.12014389038085938, 0.1125030517578125, 0.811370849609375, -0.6154098510742188, -0.049297332763671875, -0.6681976318359375, 0.508056640625, -0.0123291015625, -0.3262786865234375, -0.3011932373046875, 0.728240966796875, 0.22314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": -0.0085868239402771, "std": 0.36929556727409363, "min": -0.8198928833007812, "p10": -0.5253963470458984, "median": -0.0078277587890625, "p90": 0.43510589599609384, "max": 0.7887344360351562, "pos_frac": 0.5, "sample": [0.0660552978515625, -0.7017059326171875, 0.22045326232910156, -0.11574935913085938, -0.8063812255859375, 0.3195610046386719, -0.2984504699707031, 0.610809326171875, -0.09203338623046875, 0.41878509521484375, -0.0758056640625, 0.29642486572265625, -0.4434356689453125, 0.6133041381835938, 0.5284500122070312, 0.1335906982421875, 0.3832530975341797, -0.23143768310546875, -0.1382617950439453, -0.6042289733886719, -0.05113983154296875, -0.0203857421875, 0.3785972595214844, 0.065277099609375, 0.004730224609375, -0.24693679809570312, -0.214080810546875, -0.11739349365234375, -0.5223121643066406, -0.2814826965332031, -0.2527275085449219, 0.12579345703125, 0.13303756713867188, -0.3516845703125, 0.6834869384765625, 0.4118309020996094, 0.0873565673828125, 0.193511962890625, 0.097869873046875, -0.8198928833007812, -0.423492431640625, -0.6703338623046875, 0.13354110717773438, -0.346527099609375, 0.29711151123046875, -0.0272216796875, 0.3967552185058594, 0.08040237426757812, 0.0674591064453125, -0.3512420654296875, 0.2330322265625, -0.5267181396484375, -0.5547714233398438, 0.44210052490234375, 0.05260467529296875, -0.06905364990234375, -0.057041168212890625, 0.13224411010742188, 0.4831829071044922, -0.1024322509765625, 0.7887344360351562, -0.1556243896484375, -0.09123992919921875, 0.3323211669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.09634318947792053, "std": 0.40947869420051575, "min": -1.2588043212890625, "p10": -0.6366325378417969, "median": -0.050934791564941406, "p90": 0.34046192169189465, "max": 1.2405929565429688, "pos_frac": 0.40625, "sample": [0.514251708984375, 0.07537841796875, -0.05887603759765625, -0.06158638000488281, -0.2584381103515625, -0.05810737609863281, 0.09177398681640625, 0.16762542724609375, 0.4863433837890625, -0.36405181884765625, -0.2481842041015625, 0.0378875732421875, -0.18804168701171875, 0.27057838439941406, 0.27423858642578125, -0.1644439697265625, -0.030694961547851562, 0.16215133666992188, 0.05963897705078125, -0.2588653564453125, -0.01737213134765625, -1.2588043212890625, 0.14166259765625, -0.1670074462890625, -0.08440589904785156, -0.5281295776367188, -0.5541152954101562, 0.3532238006591797, -0.04376220703125, -0.496795654296875, 0.3128471374511719, -0.45836639404296875, -0.28307342529296875, 0.676300048828125, -0.37369537353515625, -0.6930007934570312, -0.17707061767578125, -0.025175094604492188, -0.1424846649169922, 0.056671142578125, 0.17957687377929688, -0.8460159301757812, 0.01568603515625, -0.3712921142578125, -0.1360797882080078, -0.823028564453125, -0.7056884765625, 0.17137908935546875, 0.3522968292236328, 0.0657196044921875, -0.18630218505859375, -0.6564483642578125, -0.5240936279296875, -0.00787353515625, 1.2405929565429688, -0.648284912109375, -0.46152496337890625, -0.6094436645507812, 0.0416259765625, 0.0264434814453125, 0.07775497436523438, 0.6818714141845703, -0.01915740966796875, 0.2902984619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": -0.012945234775543213, "std": 0.4399082362651825, "min": -1.2348480224609375, "p10": -0.6562820434570311, "median": 0.02233600616455078, "p90": 0.5144311904907227, "max": 0.8320407867431641, "pos_frac": 0.515625, "sample": [-0.4419403076171875, -0.10701751708984375, 0.8320407867431641, 0.4642486572265625, -0.25360870361328125, -0.2138671875, -0.15659332275390625, -0.2716789245605469, 0.2684173583984375, 0.2679901123046875, -0.21331024169921875, 0.67779541015625, -0.7179336547851562, -0.15008544921875, -0.4409942626953125, -1.09783935546875, -0.3024139404296875, -0.2708091735839844, -0.88677978515625, -0.24997711181640625, -0.20217514038085938, 0.040950775146484375, 0.19257354736328125, 0.314727783203125, 0.5077743530273438, 0.08309173583984375, 0.5489959716796875, -0.084869384765625, -0.03826713562011719, 0.4524650573730469, 0.13898849487304688, 0.06430244445800781, -0.05036163330078125, 0.661224365234375, 0.2687225341796875, -0.5124282836914062, -0.038715362548828125, -0.015682220458984375, 0.3557586669921875, 0.2270050048828125, -0.08782958984375, -0.8063201904296875, -1.2348480224609375, 0.03618812561035156, 0.17120361328125, -0.1313323974609375, 0.00848388671875, 0.1641998291015625, -0.079254150390625, -0.013086318969726562, -0.1061248779296875, 0.04266357421875, 0.2224273681640625, 0.4110546112060547, 0.5166759490966797, 0.561767578125, 0.1627044677734375, 0.07568359375, 0.1983051300048828, 0.136444091796875, -0.7369384765625, 0.599212646484375, -1.09869384765625, 0.5091934204101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.035097718238830566, "std": 0.33293405175209045, "min": -0.7979202270507812, "p10": -0.3428230285644531, "median": 0.014728546142578125, "p90": 0.5359405517578127, "max": 0.841827392578125, "pos_frac": 0.53125, "sample": [0.170440673828125, -0.23137664794921875, 0.1553821563720703, -0.06271743774414062, -0.1620311737060547, 0.5780029296875, 0.1474475860595703, 0.1669921875, -0.04015350341796875, -0.19385528564453125, 0.254241943359375, 0.579803466796875, -0.44954681396484375, 0.6824188232421875, -0.7979202270507812, 0.0970306396484375, -0.3803367614746094, -0.19876861572265625, 0.19648361206054688, 0.0052318572998046875, 0.5588951110839844, 0.045497894287109375, 0.3954143524169922, -0.1946868896484375, -0.05411529541015625, -0.24300384521484375, -0.5994338989257812, -0.13753509521484375, 0.2599830627441406, 0.4040336608886719, -0.152191162109375, -0.299224853515625, 0.013715744018554688, -0.25511932373046875, -0.28639984130859375, -0.21666717529296875, -0.326507568359375, 0.4823799133300781, 0.1708831787109375, 0.2111358642578125, -0.1300811767578125, -0.363006591796875, 0.03798484802246094, -0.1182861328125, 0.0742950439453125, -0.04776763916015625, 0.6140823364257812, 0.4033050537109375, -0.08172988891601562, 0.100616455078125, -0.28907012939453125, 0.015741348266601562, 0.0394287109375, -0.058506011962890625, 0.284088134765625, -0.34981536865234375, 0.38140106201171875, -0.2606201171875, 0.35472869873046875, 0.7069854736328125, 0.841827392578125, -0.4273681640625, 0.11353683471679688, 0.11066055297851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.12356960773468018, "std": 0.4225120544433594, "min": -0.781341552734375, "p10": -0.30948410034179685, "median": 0.006224632263183594, "p90": 0.7036849975585939, "max": 1.248016357421875, "pos_frac": 0.53125, "sample": [-0.09303665161132812, 1.110443115234375, 0.5634059906005859, 0.20226097106933594, -0.02776336669921875, 0.36891937255859375, 0.1520538330078125, 0.04509925842285156, 0.0028972625732421875, 0.9641647338867188, 0.03432655334472656, -0.0872039794921875, -0.24532318115234375, -0.363006591796875, -0.1596832275390625, 0.3244171142578125, -0.1319427490234375, 0.021617889404296875, 0.478057861328125, 0.026844024658203125, 0.37949371337890625, -0.18076324462890625, -0.050327301025390625, 8.7738037109375e-05, -0.3367462158203125, 0.6709365844726562, 0.6584243774414062, -0.1446533203125, -0.339202880859375, 0.4292182922363281, 0.8759536743164062, -0.309967041015625, 0.8160076141357422, -0.173095703125, 0.18550872802734375, 1.248016357421875, 0.5782318115234375, 1.20452880859375, 0.6063919067382812, -0.05372428894042969, 0.14446067810058594, -0.15644454956054688, -0.32057952880859375, -0.781341552734375, -0.0431671142578125, 0.15151214599609375, 0.3282928466796875, -0.12717247009277344, -0.0743408203125, 0.0102996826171875, -0.5221958160400391, 0.287750244140625, 0.06974029541015625, -0.30835723876953125, 0.4559745788574219, -0.16716766357421875, -0.12574386596679688, -0.2371063232421875, -0.14055633544921875, 0.009552001953125, 0.7177200317382812, -0.037700653076171875, -0.20257949829101562, -0.27326202392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.011912867426872253, "std": 0.4562760889530182, "min": -1.8329315185546875, "p10": -0.5580680847167967, "median": 0.08628320693969727, "p90": 0.43810272216796886, "max": 1.0754241943359375, "pos_frac": 0.578125, "sample": [1.0169677734375, -0.6394500732421875, 0.12822723388671875, -0.8969573974609375, -0.0366668701171875, 0.1422271728515625, -0.11616897583007812, 0.13160324096679688, 0.1490020751953125, 0.4508514404296875, 0.13712310791015625, -0.61346435546875, -0.11745452880859375, 0.39980316162109375, 0.037235260009765625, 0.1850128173828125, 0.39200592041015625, 0.54937744140625, 0.15970993041992188, -0.4214363098144531, -1.8329315185546875, -0.28136444091796875, 0.546112060546875, -0.42881011962890625, -0.38158416748046875, 0.11731719970703125, 0.1247406005859375, 1.0754241943359375, -0.0367431640625, -0.0940399169921875, -0.98846435546875, 0.383636474609375, 0.1141510009765625, -0.1887378692626953, 0.6654586791992188, -0.00173187255859375, -0.11975288391113281, -0.7116546630859375, -0.911224365234375, -0.03502655029296875, -0.024505615234375, 0.184539794921875, -0.029397964477539062, 0.07572555541992188, -0.2877349853515625, 0.6409225463867188, 0.408355712890625, 0.383087158203125, 0.07919788360595703, 0.3327789306640625, -0.10840988159179688, 0.03140449523925781, 0.2059173583984375, 0.11478424072265625, 0.153961181640625, 0.243682861328125, 0.0933685302734375, 0.13920211791992188, -0.1423358917236328, -0.14885330200195312, 0.26609039306640625, 0.16860198974609375, -0.1004638671875, 0.030181884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": -0.01975110173225403, "std": 0.3734043836593628, "min": -0.972320556640625, "p10": -0.459393310546875, "median": -0.04879570007324219, "p90": 0.3486932754516603, "max": 1.0973968505859375, "pos_frac": 0.421875, "sample": [-0.1880950927734375, -0.1652088165283203, 0.120819091796875, -0.19745635986328125, -0.453155517578125, 0.24860382080078125, 0.23322296142578125, 0.30879783630371094, 0.270538330078125, -0.3918800354003906, -0.10407257080078125, 0.706573486328125, -0.28171539306640625, -0.5949249267578125, -0.31195831298828125, 1.0973968505859375, -0.35330963134765625, -0.151397705078125, -0.27773475646972656, 0.08795928955078125, 0.10177993774414062, -0.01904296875, 0.20259857177734375, -0.040073394775390625, 0.5116710662841797, -0.0989990234375, -0.003231048583984375, -0.13030052185058594, -0.22042465209960938, -0.6062469482421875, -0.21010589599609375, 0.14189910888671875, 0.0526275634765625, 0.36579132080078125, -0.972320556640625, -0.07761383056640625, -0.05751800537109375, -0.07695388793945312, -0.18665122985839844, -0.6658935546875, 0.30158233642578125, 0.18074798583984375, -0.09552001953125, 0.25710296630859375, 0.534423828125, 0.051311492919921875, -0.018548965454101562, -0.027200698852539062, -0.05858612060546875, 0.15180206298828125, -0.462066650390625, -0.09716033935546875, 0.0991363525390625, 0.06884956359863281, 0.23600006103515625, -0.7449951171875, 0.0724029541015625, 0.5785140991210938, 1.06060791015625, -0.28324127197265625, -0.18695068359375, -0.2163238525390625, 0.237060546875, -0.5170135498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.05890271067619324, "std": 0.3644644320011139, "min": -0.7371597290039062, "p10": -0.42365493774414054, "median": 0.0010852813720703125, "p90": 0.548341178894043, "max": 0.7784652709960938, "pos_frac": 0.5, "sample": [-0.22602081298828125, 0.022497177124023438, -0.0825347900390625, 0.07165145874023438, 0.6415863037109375, 0.37509918212890625, 0.21063232421875, -0.526397705078125, 0.14012908935546875, 0.11450386047363281, 0.36824798583984375, 0.16273880004882812, 0.74102783203125, 0.6937942504882812, -0.4961204528808594, 0.0687255859375, -0.4511756896972656, -0.16277313232421875, -0.044445037841796875, -0.014158248901367188, 0.53936767578125, -0.0196685791015625, 0.18851470947265625, 0.3149757385253906, 0.7407989501953125, -0.4540557861328125, -0.6722412109375, 0.15859222412109375, 0.522979736328125, 0.7784652709960938, -0.008068084716796875, -0.05006980895996094, 0.531463623046875, -0.006053924560546875, 0.17099571228027344, -0.21258163452148438, -0.06309890747070312, -0.348358154296875, 0.3687877655029297, 0.475677490234375, -0.125335693359375, -0.1960906982421875, 0.0082244873046875, 0.7521896362304688, -0.23978424072265625, -0.3594398498535156, 0.5521869659423828, 0.06943893432617188, -0.143157958984375, -0.19156646728515625, -0.7371597290039062, -0.04689788818359375, 0.10286712646484375, -0.5773582458496094, -0.09273529052734375, 0.42574310302734375, 0.47731781005859375, 0.2042999267578125, -0.2304840087890625, -0.113006591796875, -0.022432327270507812, -0.23944091796875, 0.11313629150390625, -0.1841716766357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.015159964561462402, "std": 0.37288913130760193, "min": -1.1375350952148438, "p10": -0.5148216247558592, "median": 0.026262283325195312, "p90": 0.4606056213378906, "max": 0.9191207885742188, "pos_frac": 0.546875, "sample": [0.08838844299316406, 0.58917236328125, 0.5738639831542969, -0.19068336486816406, 0.46123504638671875, 0.13872718811035156, -0.08790779113769531, -0.11321258544921875, -0.5999412536621094, 0.459136962890625, 0.220489501953125, 0.05905342102050781, 0.106475830078125, -0.6724319458007812, 0.42464447021484375, 0.0009002685546875, -0.14478302001953125, -0.6951446533203125, -0.15690040588378906, 0.24054718017578125, -0.06341552734375, -1.1375350952148438, -0.24375152587890625, -0.12213134765625, -0.7669334411621094, 0.03954315185546875, 0.38489532470703125, 0.6318435668945312, 0.14653778076171875, 0.205291748046875, -0.2494964599609375, -0.5681076049804688, 0.214813232421875, -0.05573081970214844, 0.0803375244140625, 0.486572265625, -0.37555694580078125, 0.494964599609375, 0.02108001708984375, -0.09450340270996094, -0.054012298583984375, 0.42449951171875, 0.15030670166015625, 0.13757705688476562, 0.28220367431640625, -0.10283088684082031, 0.031444549560546875, -0.1730518341064453, 0.43084716796875, -0.040561676025390625, 0.14829254150390625, 0.13057327270507812, -0.3904876708984375, -0.2604484558105469, -0.30922698974609375, 0.33730316162109375, 0.014739990234375, 0.12994384765625, -0.04070281982421875, -0.00077056884765625, -0.024333953857421875, -0.7246322631835938, 0.22409820556640625, 0.9191207885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.024068236351013184, "std": 0.40050816535949707, "min": -0.764373779296875, "p10": -0.4057914733886719, "median": -0.020565032958984375, "p90": 0.532659149169922, "max": 1.34521484375, "pos_frac": 0.46875, "sample": [0.04516029357910156, 1.34521484375, -0.764373779296875, -0.40688323974609375, -0.00457763671875, 0.10187149047851562, -0.258697509765625, 0.032375335693359375, -0.04830169677734375, 0.34474945068359375, 0.47686767578125, 0.300537109375, 1.1074600219726562, -0.46417999267578125, -0.027606964111328125, 0.13275909423828125, -0.2765083312988281, -0.4032440185546875, -0.2769927978515625, -0.3593292236328125, 0.5591278076171875, 0.3618907928466797, 0.21403884887695312, 0.507232666015625, -0.013523101806640625, -0.15185546875, -0.561767578125, -0.16212844848632812, -0.33934783935546875, 0.09944534301757812, 0.1415557861328125, 0.22269439697265625, 0.26474761962890625, -0.15667343139648438, -0.10848236083984375, 0.5435562133789062, -0.38616943359375, -0.1561126708984375, -0.08846282958984375, 0.0531158447265625, 0.37245941162109375, -0.0289459228515625, -0.048824310302734375, 0.592437744140625, 0.8286590576171875, -0.12997055053710938, -0.19473838806152344, -0.5200767517089844, 0.2691192626953125, 0.2151641845703125, -0.37526512145996094, -0.6778640747070312, -0.033843994140625, 0.17792129516601562, -0.06276702880859375, -0.2764892578125, 0.20079421997070312, 0.041656494140625, 0.5483665466308594, 0.2056884765625, -0.3825531005859375, -0.375701904296875, 0.31797027587890625, -0.56201171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.1112699806690216, "std": 0.3350566625595093, "min": -0.9517059326171875, "p10": -0.2717323303222656, "median": 0.11788558959960938, "p90": 0.49176712036132825, "max": 0.9522247314453125, "pos_frac": 0.65625, "sample": [-0.4025535583496094, 0.17017745971679688, 0.39892578125, 0.335968017578125, -0.18628692626953125, -0.21044540405273438, 0.2863922119140625, -0.9517059326171875, 0.14983367919921875, -0.24761962890625, -0.105224609375, -0.0765228271484375, 0.4058380126953125, 0.3535003662109375, -0.2353954315185547, -0.1678314208984375, -0.035717010498046875, 0.08437728881835938, 0.11722564697265625, 0.0972442626953125, 0.21219635009765625, 0.2752571105957031, 0.08160018920898438, 0.1194915771484375, -0.34503173828125, 0.03548431396484375, 0.42967796325683594, 0.06554794311523438, 0.4121856689453125, 0.5056076049804688, -0.13771820068359375, 0.8460845947265625, 0.664947509765625, -0.02191162109375, 0.16144752502441406, 0.41329193115234375, -0.35517120361328125, 0.05068397521972656, 0.116546630859375, 0.3243408203125, 0.527862548828125, -0.03325653076171875, 0.02005767822265625, 0.18631935119628906, -0.1227264404296875, 0.1187744140625, 0.3301734924316406, -0.28206634521484375, -0.040130615234375, -0.19681549072265625, 0.20672225952148438, 0.3406333923339844, 0.2176361083984375, 0.18653488159179688, 0.1185455322265625, -0.547760009765625, 0.77593994140625, 0.5372314453125, 0.9522247314453125, -0.46381378173828125, 0.25452423095703125, 0.0828704833984375, 0.45947265625, -0.14241409301757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.009129911661148071, "std": 0.35638073086738586, "min": -1.170318603515625, "p10": -0.39438018798828123, "median": 0.02855682373046875, "p90": 0.4722116470336914, "max": 0.8979949951171875, "pos_frac": 0.546875, "sample": [0.0904998779296875, 0.48029327392578125, 0.615570068359375, -0.2704620361328125, 0.07890701293945312, 0.03369903564453125, -0.2856483459472656, -0.4032745361328125, -0.4445648193359375, 0.4690380096435547, -0.373626708984375, -0.11449432373046875, -0.00131988525390625, 0.5221672058105469, -0.1253509521484375, 0.47357177734375, -0.2053203582763672, 0.018497467041015625, -0.2389984130859375, -0.100189208984375, 0.31801414489746094, 0.0342864990234375, -0.82598876953125, 0.10405540466308594, 0.20461273193359375, 0.2223052978515625, -0.130126953125, 0.4591522216796875, -0.2239818572998047, -0.3165149688720703, 0.3580436706542969, 0.24739646911621094, -0.141143798828125, -0.5412521362304688, 0.8979949951171875, -0.06795883178710938, 0.14827728271484375, 0.09657096862792969, -0.5007705688476562, -0.345184326171875, 0.5555915832519531, 0.416595458984375, 0.03540802001953125, -0.0435791015625, 0.339141845703125, 0.02341461181640625, 0.16669464111328125, -0.09638023376464844, 0.502899169921875, 0.18881607055664062, 0.17927169799804688, -0.26836585998535156, -0.4995269775390625, 0.00176239013671875, -0.14936065673828125, 0.34973907470703125, 0.1113433837890625, -0.1737194061279297, -1.170318603515625, -0.11406898498535156, 0.0698699951171875, -0.3471794128417969, 0.11423492431640625, 0.1752490997314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.09958454966545105, "std": 0.3424179255962372, "min": -0.7257232666015625, "p10": -0.2792842864990234, "median": 0.0987710952758789, "p90": 0.4857616424560547, "max": 1.1298370361328125, "pos_frac": 0.609375, "sample": [0.17469406127929688, 0.11559867858886719, -0.2881927490234375, -0.08322906494140625, 0.1004180908203125, -0.300262451171875, -0.26099395751953125, -0.029598236083984375, 0.07078170776367188, 0.574554443359375, 0.14110946655273438, -0.145965576171875, 0.3109550476074219, 0.25209808349609375, 0.39947509765625, -0.7257232666015625, 0.466827392578125, 0.36087799072265625, -0.7207260131835938, 0.3323211669921875, 0.22406005859375, -0.2382965087890625, -0.046722412109375, 0.675262451171875, -0.1100311279296875, 0.2870922088623047, 0.4937591552734375, 0.038661956787109375, -0.2860374450683594, -0.15534210205078125, 0.074493408203125, 0.4868431091308594, 0.06191062927246094, -0.00788116455078125, 0.31330108642578125, -0.4673023223876953, 0.21407318115234375, 0.22029495239257812, -0.26352691650390625, -0.2376708984375, -0.1998577117919922, 1.1298370361328125, -0.1257476806640625, 0.1417388916015625, 0.020738601684570312, 0.30010986328125, 0.48323822021484375, -0.0160369873046875, 0.1554107666015625, -0.018308639526367188, 0.7036666870117188, -0.05767059326171875, 0.21092987060546875, 0.09712409973144531, 0.2923583984375, 0.015699386596679688, -0.17110443115234375, 0.20867538452148438, 0.4373779296875, -0.01519775390625, 0.97650146484375, 0.1166534423828125, -0.4942474365234375, 0.1595611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.07201322913169861, "std": 0.3595223128795624, "min": -1.0636749267578125, "p10": -0.32011642456054684, "median": 0.08446598052978516, "p90": 0.46995754241943366, "max": 1.0487518310546875, "pos_frac": 0.625, "sample": [-0.07718467712402344, 0.08168983459472656, -0.2206134796142578, 0.7524566650390625, 0.0610198974609375, 0.3788738250732422, 0.12888336181640625, 0.3206005096435547, 0.321929931640625, 0.9326858520507812, -0.7806167602539062, 0.105712890625, 0.4922332763671875, -0.0038394927978515625, 0.07451629638671875, 0.30693817138671875, -0.4559898376464844, 0.037837982177734375, -0.6634750366210938, -0.0164337158203125, 0.1449432373046875, -0.5301666259765625, -0.13878631591796875, 0.44712257385253906, -0.02545166015625, -0.20616912841796875, -0.16741180419921875, 0.1547985076904297, -0.07762527465820312, 0.03841209411621094, 0.1795024871826172, -0.46067047119140625, 0.23722457885742188, -0.27664947509765625, -0.24564170837402344, -0.0113372802734375, -0.3387451171875, -1.0636749267578125, -0.22081756591796875, 0.044158935546875, 0.06589508056640625, 0.35919189453125, 0.1009521484375, 0.20166778564453125, 0.4855804443359375, 0.11998939514160156, 0.0599822998046875, 0.393951416015625, 0.2899932861328125, -0.044689178466796875, 0.08724212646484375, -0.1347808837890625, 0.09726905822753906, 0.394317626953125, 1.0487518310546875, -0.08743858337402344, 0.64361572265625, 0.25157737731933594, 0.1483306884765625, 0.1657123565673828, -0.02022552490234375, 0.14641571044921875, 0.47974395751953125, 0.09556007385253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.1135970950126648, "std": 0.3972153067588806, "min": -0.7039356231689453, "p10": -0.3230047225952148, "median": 0.07904243469238281, "p90": 0.5587131500244141, "max": 1.2030487060546875, "pos_frac": 0.578125, "sample": [0.5071563720703125, -0.04414176940917969, 0.10587310791015625, 0.93841552734375, 0.670806884765625, 0.14611053466796875, -0.07933235168457031, -0.10668563842773438, -0.12823486328125, -0.1396331787109375, 0.236846923828125, 0.08242034912109375, 0.31608009338378906, 0.08089065551757812, 0.5452041625976562, 0.512664794921875, -0.07127571105957031, 0.3268890380859375, -0.2069988250732422, 0.5311126708984375, 0.389678955078125, -0.27178955078125, 0.19049072265625, 0.17934417724609375, 0.17464447021484375, 1.073028564453125, -0.2760009765625, -0.2923297882080078, 0.11890029907226562, 0.03662872314453125, -0.6674880981445312, 0.37926483154296875, -0.11960983276367188, 0.0771942138671875, -0.336151123046875, -0.3433647155761719, 0.39316558837890625, 0.41185569763183594, 1.2030487060546875, 0.02933502197265625, -0.05377197265625, -0.7039356231689453, 0.04042816162109375, -0.07038307189941406, -0.19314956665039062, -0.25531768798828125, 0.5645027160644531, 0.28281402587890625, 0.10309791564941406, -0.42612457275390625, -0.11884689331054688, 0.0450286865234375, 0.6350746154785156, 1.0959930419921875, -0.05438423156738281, -0.2550392150878906, 0.1413421630859375, -0.10379791259765625, 0.53619384765625, 0.3778209686279297, -0.4091949462890625, 0.1297149658203125, -0.0868682861328125, -0.5249977111816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.1602029800415039, "std": 0.4216807782649994, "min": -1.184295654296875, "p10": -0.3334236145019531, "median": 0.1307220458984375, "p90": 0.7737068176269533, "max": 1.1411552429199219, "pos_frac": 0.640625, "sample": [0.3432579040527344, -0.198394775390625, -0.38983917236328125, 0.030460357666015625, 0.121368408203125, 0.0160064697265625, 0.14007568359375, 0.10016250610351562, 0.07671737670898438, -0.0143585205078125, 1.1411552429199219, 0.277374267578125, -0.12805557250976562, 0.7838668823242188, -0.3444499969482422, -0.140869140625, 0.16486358642578125, 0.2572479248046875, -0.10674285888671875, 0.1595916748046875, -0.2524871826171875, 0.7952880859375, -0.35076141357421875, 0.1036376953125, 0.026636123657226562, 0.23626327514648438, 0.75, 0.35066986083984375, -0.2331390380859375, 0.3692169189453125, 0.39582061767578125, 0.073486328125, -0.1107025146484375, -0.2721900939941406, 0.4692859649658203, 0.6831130981445312, 0.48073577880859375, 0.064422607421875, -0.08773040771484375, 0.48346710205078125, 0.93817138671875, 0.2529106140136719, 0.21753501892089844, 0.5009613037109375, -0.3076953887939453, 0.8982772827148438, -0.06277847290039062, -0.45782470703125, 0.8269500732421875, 0.3920173645019531, -0.16218948364257812, 0.41205596923828125, -0.12262725830078125, -0.06390190124511719, -1.184295654296875, -0.54583740234375, 0.15008544921875, 0.3107948303222656, 0.5019378662109375, -0.445953369140625, 0.46527099609375, 0.5893096923828125, 0.977508544921875, -0.0921630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.2226608693599701, "std": 0.46952253580093384, "min": -0.644927978515625, "p10": -0.3423820495605468, "median": 0.27054309844970703, "p90": 0.567683792114258, "max": 1.937255859375, "pos_frac": 0.640625, "sample": [-0.11545944213867188, -0.02053070068359375, 1.0083160400390625, -0.043621063232421875, 0.4161338806152344, 0.37918853759765625, -0.004119873046875, -0.36902618408203125, -0.21457290649414062, -0.5995330810546875, 1.3201904296875, 0.5279541015625, 0.3206939697265625, 0.294464111328125, 0.3191795349121094, -0.097564697265625, 0.5834617614746094, 0.5297203063964844, 0.5071487426757812, -0.07992935180664062, 0.33226776123046875, 0.40729522705078125, 1.669769287109375, 0.3983039855957031, -0.39632225036621094, 0.310455322265625, -0.6267776489257812, -0.12883377075195312, 0.4628753662109375, -0.28021240234375, 0.1675262451171875, 0.4265594482421875, 0.050689697265625, 0.3634796142578125, 0.15380477905273438, 0.29486846923828125, -0.37860870361328125, -0.000141143798828125, -0.0695343017578125, 0.4613666534423828, 0.17902755737304688, 0.39963722229003906, 0.149688720703125, 0.9099960327148438, -0.041820526123046875, 1.937255859375, 0.3083610534667969, -0.644927978515625, 0.2367706298828125, -0.188934326171875, 0.3042755126953125, 0.3369274139404297, -0.032825469970703125, 0.05437278747558594, 0.24662208557128906, 0.6364898681640625, -0.5144424438476562, 0.5308685302734375, -0.006679534912109375, 0.4099578857421875, 0.38803863525390625, -0.1847991943359375, 0.1057586669921875, 0.4497509002685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.19149288535118103, "std": 0.37550482153892517, "min": -0.7211227416992188, "p10": -0.2494230270385742, "median": 0.16875362396240234, "p90": 0.64320068359375, "max": 1.04345703125, "pos_frac": 0.65625, "sample": [0.40950775146484375, 0.6480369567871094, -0.2259044647216797, -0.0625457763671875, 0.29180335998535156, 0.3008136749267578, 0.2714385986328125, 0.06613922119140625, 0.5987472534179688, -0.13518524169921875, 0.364959716796875, 0.150177001953125, -0.13245582580566406, 0.5040283203125, 0.5683021545410156, 0.17625999450683594, -0.022369384765625, -0.022335052490234375, -0.021942138671875, -0.07380294799804688, 0.6855430603027344, -0.7211227416992188, -0.43575096130371094, 0.5039520263671875, 0.406768798828125, 0.11883544921875, 0.39873504638671875, 0.0423583984375, 0.7501220703125, 0.4245758056640625, 1.04345703125, 0.36474609375, 0.4369163513183594, -0.6941375732421875, 0.6314163208007812, -0.49957275390625, 0.3169403076171875, 0.5092010498046875, -0.0792999267578125, 0.01293182373046875, 0.863006591796875, 0.4885425567626953, -0.2595024108886719, 0.36269378662109375, 0.103759765625, 0.791900634765625, 0.4554901123046875, -0.033306121826171875, 0.7694091796875, -0.06766891479492188, 0.26943206787109375, -0.0072956085205078125, 0.16124725341796875, 0.06017112731933594, 0.0780181884765625, -0.20806884765625, 0.6319160461425781, 0.11577606201171875, -0.18626785278320312, -0.430023193359375, 0.5665054321289062, -0.3179779052734375, -0.0462188720703125, 0.22371673583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.16974005103111267, "std": 0.4560895264148712, "min": -0.9195098876953125, "p10": -0.3000141143798828, "median": 0.17023658752441406, "p90": 0.6775020599365235, "max": 1.7164306640625, "pos_frac": 0.6875, "sample": [0.4252967834472656, 0.17531204223632812, -0.50970458984375, 0.1028289794921875, -0.22306442260742188, -0.055328369140625, -0.26441192626953125, 0.5526275634765625, 0.302978515625, 0.1149139404296875, 0.05907440185546875, 0.97454833984375, -0.1948394775390625, 0.037082672119140625, 1.2755813598632812, -0.3397369384765625, -0.2700004577636719, 0.4249725341796875, 0.14126968383789062, 0.8216133117675781, -0.7927474975585938, 0.28125, -0.11631202697753906, 0.043426513671875, 0.6788787841796875, 0.321197509765625, -0.07253265380859375, 0.6052474975585938, -0.6623764038085938, 0.1651611328125, 0.19573211669921875, 0.11548614501953125, -0.16333770751953125, 0.21266937255859375, 0.28662872314453125, -0.03717803955078125, -0.3023338317871094, 1.7164306640625, -0.9195098876953125, 0.712249755859375, -0.07979965209960938, 0.5660629272460938, 0.46013641357421875, 0.23728179931640625, 0.0462188720703125, 0.06198883056640625, 0.29204559326171875, 0.20123291015625, 0.6032791137695312, 0.3056526184082031, 0.465118408203125, 0.25461578369140625, 0.36395263671875, -0.2946014404296875, 0.6878433227539062, 0.21105384826660156, -0.8283767700195312, 0.6742897033691406, -0.03432464599609375, 0.5676651000976562, 0.04192161560058594, 0.25714111328125, 0.02324676513671875, -0.039325714111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.3750268816947937, "std": 0.46615540981292725, "min": -0.5139846801757812, "p10": -0.20786972045898436, "median": 0.32225990295410156, "p90": 0.9322967529296877, "max": 1.776458740234375, "pos_frac": 0.796875, "sample": [0.553558349609375, 0.15564346313476562, 0.3622894287109375, 0.351470947265625, 1.4095458984375, 0.35115814208984375, 0.4054412841796875, 0.9672775268554688, 0.056488037109375, -0.2157878875732422, 0.8489532470703125, 0.23184967041015625, 1.776458740234375, 0.29579925537109375, -0.5139846801757812, 0.8055419921875, 0.11487579345703125, -0.06561279296875, 0.96173095703125, -0.24879074096679688, 0.4546966552734375, 0.0059795379638671875, 0.5629348754882812, 0.37230682373046875, -0.0602874755859375, 0.8470916748046875, 0.5364952087402344, -0.1308441162109375, 0.2647514343261719, 0.47576904296875, 0.116668701171875, 0.26056671142578125, 0.2704334259033203, 0.248260498046875, -0.09832572937011719, 0.561798095703125, 1.6975555419921875, 0.2250518798828125, 0.23366928100585938, 0.48535919189453125, 0.21258544921875, 0.5079574584960938, 1.395233154296875, 0.32421112060546875, 0.6279945373535156, 0.7987518310546875, 0.012302398681640625, 0.3037567138671875, 0.3203086853027344, -0.24853897094726562, -0.322509765625, 0.1309967041015625, -0.1893939971923828, 0.186187744140625, -0.26560211181640625, 0.57293701171875, 0.863616943359375, -0.24929046630859375, 1.0852279663085938, 0.3682518005371094, 0.6687583923339844, -0.08311080932617188, 0.6250247955322266, 0.4222259521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.4876362681388855, "std": 0.5244288444519043, "min": -0.78887939453125, "p10": -0.011616325378417955, "median": 0.4397106170654297, "p90": 1.29847640991211, "max": 2.445465087890625, "pos_frac": 0.890625, "sample": [0.2604351043701172, 0.9313468933105469, 0.004058837890625, 0.6190528869628906, 0.6502113342285156, 1.3787765502929688, 0.24798583984375, 0.2806549072265625, 1.4319610595703125, 0.04855918884277344, 0.6108112335205078, 0.49416351318359375, 0.5237922668457031, -0.09346580505371094, 0.5866546630859375, 0.3453369140625, 2.445465087890625, 0.2444610595703125, 0.3167381286621094, 0.46523284912109375, 0.05828094482421875, -0.2060546875, 0.423431396484375, 0.0020809173583984375, 0.37589263916015625, 0.3475341796875, 0.019350051879882812, 0.15649986267089844, 0.05225372314453125, 1.3654632568359375, 0.212860107421875, -0.2784233093261719, 0.008504867553710938, 1.4000244140625, 0.128875732421875, 0.72540283203125, 0.8307952880859375, 0.8131103515625, 0.38010406494140625, 0.15294647216796875, 0.3326988220214844, 0.5497894287109375, 0.5977630615234375, 0.6844711303710938, 0.2260284423828125, 0.9150848388671875, -0.78887939453125, 0.5496330261230469, 0.6951065063476562, 1.1421737670898438, 1.4484100341796875, -0.17156982421875, 0.7229461669921875, 0.8617019653320312, 1.0203857421875, 0.5943374633789062, 1.3751144409179688, -0.3793373107910156, 0.4559898376464844, -0.017486572265625, 0.203582763671875, 0.7448959350585938, 0.20162200927734375, 0.48309326171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.31769293546676636, "std": 0.5170885324478149, "min": -1.3853607177734375, "p10": -0.2974912643432617, "median": 0.3433647155761719, "p90": 0.9675331115722658, "max": 1.4127197265625, "pos_frac": 0.71875, "sample": [1.254180908203125, -0.0003204345703125, 0.17504119873046875, 0.34328460693359375, 0.6929798126220703, -0.31630706787109375, -0.021982192993164062, 0.3119659423828125, -0.1280364990234375, -0.07691764831542969, -0.3969879150390625, 0.798431396484375, 1.4127197265625, 0.28070068359375, 0.9088668823242188, -0.033905029296875, 0.2024688720703125, 0.7090911865234375, 0.35160064697265625, 0.25702667236328125, 0.572967529296875, 0.0066680908203125, 0.090667724609375, 0.34344482421875, -0.216400146484375, 1.162750244140625, 0.5511932373046875, 0.667633056640625, 1.2624053955078125, 0.682647705078125, 0.3739509582519531, 0.3494110107421875, 0.09710311889648438, -0.5216827392578125, 0.7465476989746094, 0.2857017517089844, 0.6197662353515625, 0.7583847045898438, 0.0512237548828125, -0.14080429077148438, 0.47991943359375, 0.5557632446289062, 0.05655479431152344, 0.80865478515625, 0.99267578125, 0.5229949951171875, 0.42134857177734375, -0.10571479797363281, 0.4463386535644531, -0.16582489013671875, 0.6496849060058594, -0.38202667236328125, -0.534454345703125, 0.50689697265625, -0.16546249389648438, 0.6924400329589844, 0.1793365478515625, 1.3203125, 1.157928466796875, -0.2535877227783203, 0.39926910400390625, -1.3853607177734375, -0.378814697265625, 0.045993804931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5121327638626099, "std": 0.6065710783004761, "min": -0.5622100830078125, "p10": -0.1491420745849609, "median": 0.4494285583496094, "p90": 1.3740486145019533, "max": 2.2533111572265625, "pos_frac": 0.75, "sample": [1.40533447265625, 0.16167640686035156, -0.0516357421875, 0.2167510986328125, 0.08490753173828125, 0.44203948974609375, -0.03441619873046875, 1.2401657104492188, 0.727294921875, -0.169158935546875, -0.2592887878417969, -0.10243606567382812, 1.296722412109375, 0.5601043701171875, 0.1920013427734375, 0.7419891357421875, 1.4428863525390625, 0.21480178833007812, -0.04673004150390625, 1.0909652709960938, -0.39545440673828125, 0.5540924072265625, 0.3701515197753906, 0.6288013458251953, 1.3282852172851562, 0.635101318359375, -0.092864990234375, 0.6387252807617188, -0.0390472412109375, 0.76959228515625, 0.03403472900390625, 1.4790878295898438, 2.2533111572265625, -0.3668975830078125, -0.06558990478515625, 0.3428020477294922, -0.0465545654296875, 0.11548995971679688, 1.8636093139648438, 1.10888671875, 0.786865234375, 0.6163482666015625, 0.36785125732421875, 0.456817626953125, 0.5876846313476562, 0.08697891235351562, 1.0743942260742188, -0.2424468994140625, 0.4854583740234375, -0.2041645050048828, 0.23752403259277344, 0.08160591125488281, 0.33795166015625, 0.0791168212890625, 0.8233528137207031, 1.3936614990234375, -0.0211334228515625, 0.7349624633789062, 0.5697517395019531, 0.8678092956542969, 1.8126678466796875, 1.28692626953125, -0.5622100830078125, 0.8491859436035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6934208869934082, "std": 0.606072187423706, "min": -0.4423980712890625, "p10": 0.01745986938476567, "median": 0.6597156524658203, "p90": 1.4657714843750003, "max": 2.713592529296875, "pos_frac": 0.890625, "sample": [1.3689155578613281, 0.06026458740234375, 0.10856246948242188, 0.8546905517578125, 1.262603759765625, 0.07994842529296875, -0.168731689453125, 0.1956787109375, -0.3652534484863281, 0.4379119873046875, 0.5494537353515625, 0.6923675537109375, 1.293731689453125, 1.1169776916503906, 1.420440673828125, 1.3595504760742188, 1.1872634887695312, 0.8704147338867188, 0.19406509399414062, 1.74835205078125, 0.6606597900390625, 0.687225341796875, 2.713592529296875, 0.6587715148925781, 0.9957809448242188, 0.21407127380371094, 0.619293212890625, 1.0680999755859375, -0.2291717529296875, 0.26823997497558594, 0.34188079833984375, 0.30747222900390625, 0.49651336669921875, 0.22049522399902344, 0.2888298034667969, 1.0730209350585938, 1.6163482666015625, 1.485198974609375, 1.20965576171875, 0.737335205078125, 0.07217597961425781, 0.699737548828125, 0.5121593475341797, 0.738494873046875, -0.4423980712890625, 1.2599029541015625, 1.577056884765625, 0.7506313323974609, 1.0629425048828125, 1.1431350708007812, 1.570220947265625, 0.3157958984375, 0.4179363250732422, -0.21418190002441406, -0.03636360168457031, 0.7613296508789062, 0.61834716796875, -0.000885009765625, 0.6206207275390625, 0.249664306640625, 0.7256011962890625, 1.870513916015625, 0.24141693115234375, 0.16455841064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.5179519653320312, "std": 0.681266188621521, "min": -0.90509033203125, "p10": -0.16937026977539055, "median": 0.3748626708984375, "p90": 1.4280273437500002, "max": 3.0517120361328125, "pos_frac": 0.828125, "sample": [0.18681907653808594, 0.8776283264160156, -0.201507568359375, 0.3857421875, 1.442108154296875, 0.7196578979492188, 0.040729522705078125, -0.2805633544921875, 0.31800079345703125, -0.0162811279296875, 3.0517120361328125, 0.363983154296875, 0.681121826171875, 0.2541675567626953, 0.2502479553222656, 0.19036483764648438, 1.0051651000976562, 0.3190269470214844, 1.5547637939453125, 0.9786243438720703, 0.125640869140625, 0.3092842102050781, 0.335479736328125, 0.134246826171875, 0.19252777099609375, 0.19293212890625, 0.9823150634765625, 0.3378143310546875, 0.5788440704345703, 0.06288909912109375, 1.4446258544921875, 0.7070770263671875, 1.2216033935546875, -0.406646728515625, 0.7865314483642578, -0.71136474609375, 0.09165191650390625, 0.5097198486328125, -0.08222198486328125, 0.72308349609375, 0.8074569702148438, 1.7222061157226562, -0.292388916015625, 0.45751380920410156, -0.09438323974609375, -0.90509033203125, 0.268096923828125, 0.4324798583984375, 0.0132293701171875, -0.020862579345703125, 0.6192607879638672, 1.395172119140625, 2.307708740234375, 0.1644134521484375, -0.32738494873046875, 0.5289764404296875, 0.4911842346191406, 0.7304229736328125, 0.5030250549316406, 0.40323638916015625, 1.292327880859375, 2.053997039794922, 0.31830596923828125, 0.6224784851074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.760206937789917, "std": 0.8808306455612183, "min": -0.7363243103027344, "p10": -0.3196872711181641, "median": 0.6155147552490234, "p90": 1.9385223388671875, "max": 3.62139892578125, "pos_frac": 0.828125, "sample": [0.5645751953125, 0.3993988037109375, 0.8816375732421875, 0.979888916015625, 0.6569747924804688, 0.152496337890625, -0.10871124267578125, 1.07958984375, 0.5430068969726562, 0.829315185546875, 1.2627410888671875, 0.78753662109375, -0.5208511352539062, 0.09455299377441406, -0.71307373046875, 1.2600860595703125, -0.15056228637695312, 1.1697731018066406, -0.3209953308105469, 3.1705169677734375, 0.7199897766113281, 1.8716201782226562, 1.6785888671875, 0.09659576416015625, 1.2318534851074219, 0.8055381774902344, 0.061431884765625, -0.3940277099609375, -0.7363243103027344, 1.0589981079101562, 0.41802978515625, 0.2954425811767578, 0.38841819763183594, 2.3055267333984375, 1.3782234191894531, 0.7765884399414062, 1.0633544921875, 0.5740547180175781, -0.12512588500976562, 0.32904052734375, 2.05767822265625, 1.1020965576171875, 0.126708984375, 0.33992767333984375, 3.62139892578125, 1.066497802734375, 0.30904388427734375, 2.5210037231445312, 1.9200439453125, -0.3166351318359375, -0.3306846618652344, 0.473480224609375, 1.946441650390625, 0.9805908203125, -0.38114166259765625, 0.3084850311279297, 1.245452880859375, 2.026519775390625, 0.21115493774414062, 1.119049072265625, 0.2462749481201172, 0.434295654296875, 1.531494140625, 0.2783546447753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.561225950717926, "std": 0.6193910837173462, "min": -0.8821182250976562, "p10": -0.24847621917724608, "median": 0.5528402328491211, "p90": 1.3423286437988287, "max": 2.04376220703125, "pos_frac": 0.796875, "sample": [-0.2811431884765625, 0.32668113708496094, 0.8204460144042969, 0.7899665832519531, 1.7001800537109375, 0.48987579345703125, 0.136444091796875, 0.456634521484375, 0.4143333435058594, 0.3898906707763672, 0.8248291015625, 1.1364212036132812, 1.87139892578125, 1.1219329833984375, 0.833465576171875, 0.45056915283203125, 0.6226119995117188, -0.8821182250976562, 0.7039947509765625, -0.71649169921875, -0.01923370361328125, 1.0624923706054688, 1.204620361328125, 0.6972885131835938, 0.8208236694335938, -0.25959014892578125, 1.2382621765136719, 0.35219573974609375, 0.03438568115234375, 1.555633544921875, -0.1686553955078125, -0.152252197265625, 2.04376220703125, -0.22254371643066406, 0.07305145263671875, 0.9753341674804688, 0.37090301513671875, 0.24759674072265625, 0.9899101257324219, 0.6898956298828125, 0.4432563781738281, 1.1415634155273438, -0.3188018798828125, 0.4164314270019531, 0.35884857177734375, 0.9616165161132812, -0.059417724609375, 1.514129638671875, 0.6158046722412109, -0.10463333129882812, 0.47463226318359375, -0.2795753479003906, 1.4286842346191406, 0.06801605224609375, 0.648773193359375, 0.8397445678710938, 1.02099609375, -0.73126220703125, 0.43761444091796875, 0.8840255737304688, 0.3781471252441406, 0.8348541259765625, 0.8142814636230469, 1.3869285583496094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8326810002326965, "std": 0.8710805177688599, "min": -0.802154541015625, "p10": -0.10065250396728513, "median": 0.6608772277832031, "p90": 2.2343391418457035, "max": 3.2057037353515625, "pos_frac": 0.84375, "sample": [2.551666259765625, -0.48569488525390625, 0.8673629760742188, -0.21595001220703125, 3.2057037353515625, 0.5345687866210938, 1.5284271240234375, 0.93505859375, 0.1971435546875, 0.0678863525390625, 0.6618270874023438, -0.07333946228027344, 0.7902908325195312, 0.2416534423828125, 1.3723678588867188, 2.2008056640625, 0.2725048065185547, 0.10186004638671875, 2.337921142578125, 0.1582489013671875, 1.0330886840820312, -0.11591339111328125, 1.4510765075683594, 1.4540290832519531, 0.873046875, 1.3708648681640625, 2.1939544677734375, -0.5846328735351562, 0.816619873046875, 0.8406829833984375, 0.319580078125, 0.40766143798828125, 1.3375740051269531, 1.4687232971191406, 2.25048828125, 0.8861351013183594, 0.9169559478759766, 2.4648895263671875, 0.4463634490966797, -0.1957244873046875, 0.07815933227539062, 0.27242279052734375, 1.3107376098632812, 1.61151123046875, 0.532135009765625, 0.9906501770019531, 0.04534149169921875, -0.11235809326171875, 1.5406875610351562, 0.2866668701171875, 0.33754730224609375, -0.802154541015625, 0.6599273681640625, 0.2806243896484375, 0.47393798828125, -0.03467559814453125, 2.2487106323242188, 0.5720710754394531, -0.003082275390625, 0.782012939453125, 2.027923583984375, 2.519775390625, 0.21819686889648438, 0.5690383911132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1397206783294678, "std": 1.134015679359436, "min": -0.3028373718261719, "p10": 0.1434257507324219, "median": 0.83905029296875, "p90": 2.3196533203125003, "max": 7.425323486328125, "pos_frac": 0.9375, "sample": [0.5873451232910156, 0.9453468322753906, 2.619903564453125, 1.5520515441894531, 0.13525009155273438, 0.7187614440917969, 0.81683349609375, 0.01598358154296875, -0.15541839599609375, 0.948486328125, 2.2415313720703125, 0.11459159851074219, 0.6057510375976562, 0.5209579467773438, 0.4584503173828125, 0.2515411376953125, 1.9341888427734375, 3.2487411499023438, 0.8432159423828125, 0.2497997283935547, 0.4337959289550781, 2.1330718994140625, 2.3650360107421875, 0.530181884765625, 0.21973419189453125, 0.7630691528320312, 0.6661281585693359, 1.6193389892578125, 0.8732147216796875, 0.371612548828125, 7.425323486328125, 1.4324989318847656, 0.342864990234375, 1.532867431640625, 0.3272686004638672, 2.826202392578125, 0.16250228881835938, 1.7394866943359375, 2.3531341552734375, 2.158660888671875, 0.47863006591796875, 2.0854263305664062, 0.992431640625, 1.1729888916015625, 0.6043014526367188, -0.04112434387207031, 1.7507781982421875, 2.0784912109375, 1.3234786987304688, 0.6414108276367188, 0.6708621978759766, 0.3298492431640625, 0.7219886779785156, 1.9440460205078125, 0.9284591674804688, 1.0563850402832031, 0.8348846435546875, -0.3028373718261719, 1.2328872680664062, -0.04196929931640625, 0.9524555206298828, 2.3746795654296875, 1.6369781494140625, 0.58734130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1315281391143799, "std": 1.1969292163848877, "min": -1.9125823974609375, "p10": -0.19769496917724608, "median": 0.9538249969482422, "p90": 2.783982086181641, "max": 3.761810302734375, "pos_frac": 0.84375, "sample": [1.257293701171875, -0.1921100616455078, 0.8916130065917969, 0.48062896728515625, 1.647613525390625, 0.6408557891845703, 1.4162750244140625, -0.45949554443359375, -0.297088623046875, 2.866729736328125, 2.3298187255859375, 0.107177734375, -0.07446670532226562, 0.39124488830566406, 0.072998046875, 1.3556442260742188, -0.12322044372558594, 1.4644317626953125, 1.015045166015625, 0.225311279296875, -0.4275970458984375, -0.36207008361816406, 0.202423095703125, 0.16378211975097656, 2.7934188842773438, 3.1542091369628906, 1.8502998352050781, -0.7723236083984375, 2.761962890625, 0.8128814697265625, 0.7890396118164062, 1.2464675903320312, 0.3174591064453125, 2.8203582763671875, 0.07749176025390625, 0.10907363891601562, 2.6590576171875, 0.10695648193359375, 2.598663330078125, 0.6837615966796875, 2.1995773315429688, 1.255819320678711, 2.7131423950195312, 0.8926048278808594, 3.1725311279296875, 1.1645584106445312, 0.7823524475097656, 3.761810302734375, 1.1008796691894531, -1.9125823974609375, 2.6331710815429688, 1.199066162109375, 2.1902923583984375, 0.7725753784179688, 2.6604156494140625, 1.1698341369628906, -0.2000885009765625, 0.5434150695800781, 2.4724884033203125, 0.4361286163330078, 1.4015884399414062, 3.6780548095703125, 1.1784515380859375, 0.5500984191894531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0608049631118774, "std": 1.382981300354004, "min": -1.9508056640625, "p10": -0.07118377685546874, "median": 0.8222885131835938, "p90": 2.827757263183594, "max": 7.036651611328125, "pos_frac": 0.8125, "sample": [1.7434005737304688, -0.1302509307861328, 1.027191162109375, 1.08135986328125, 0.029850006103515625, -0.135498046875, -0.04650115966796875, 0.88165283203125, 1.1350479125976562, -0.056308746337890625, 0.5829010009765625, 3.550384521484375, 0.35790252685546875, -0.07183837890625, -0.0696563720703125, 0.20777130126953125, 0.7790946960449219, 0.1532306671142578, 2.0333328247070312, 7.036651611328125, 1.0965194702148438, 4.5369873046875, 1.3215751647949219, 0.8826560974121094, 0.8707504272460938, 1.2812118530273438, 0.6454925537109375, 0.7854156494140625, 1.106292724609375, 1.3711414337158203, 0.7317314147949219, 0.2631072998046875, 0.52587890625, -0.7168350219726562, 0.29730987548828125, 0.4403724670410156, 0.3207530975341797, 2.14373779296875, 2.1551895141601562, 2.6756744384765625, -0.6210441589355469, 2.87237548828125, 0.5099029541015625, 1.80291748046875, 0.03399658203125, 1.0036754608154297, 0.015262603759765625, -0.06568145751953125, 0.998321533203125, 1.5050888061523438, 2.1414108276367188, -0.3774299621582031, 2.7236480712890625, 2.9396591186523438, 0.1347503662109375, 3.080322265625, 1.2378578186035156, -1.9508056640625, 0.14245033264160156, 0.859161376953125, 2.053455352783203, 3.6605987548828125, -0.067230224609375, 0.434173583984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.4247064590454102, "std": 1.7317270040512085, "min": -1.8316001892089844, "p10": -0.0811504364013671, "median": 1.0183906555175781, "p90": 3.335007095336915, "max": 8.962127685546875, "pos_frac": 0.890625, "sample": [0.38806915283203125, 8.962127685546875, 4.96966552734375, 1.8403873443603516, 0.5874404907226562, 0.07481575012207031, 1.552032470703125, -0.18157958984375, 1.0745697021484375, 2.818267822265625, 1.6839370727539062, 0.83087158203125, 0.200531005859375, 2.1925735473632812, 1.322509765625, -0.1466827392578125, 0.535736083984375, 0.3563041687011719, 0.20783424377441406, 2.2829055786132812, 0.5715255737304688, 1.9274444580078125, 2.83074951171875, 0.3219738006591797, 3.7306365966796875, 0.138702392578125, 2.5116500854492188, 2.4321365356445312, 1.3710250854492188, 0.010768890380859375, 0.6757335662841797, 3.119853973388672, -1.8316001892089844, 1.15069580078125, 0.6665554046630859, 0.35857391357421875, 0.4155120849609375, 5.0475006103515625, 0.4788169860839844, 0.4549560546875, 3.427215576171875, 5.105682373046875, 1.847747802734375, 0.7041168212890625, 0.7002830505371094, -0.12054443359375, 0.37779998779296875, 0.12735748291015625, 0.9016265869140625, 1.6256103515625, -1.080474853515625, 0.4366912841796875, -0.21016693115234375, 1.2703781127929688, 1.2498741149902344, 2.2993087768554688, 1.1000595092773438, 5.514373779296875, 1.2089996337890625, 0.9622116088867188, 2.2709426879882812, -0.42917633056640625, 1.2813262939453125, 2.6744422912597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6478346586227417, "std": 1.4816535711288452, "min": -2.044403076171875, "p10": 0.19349784851074223, "median": 1.3763389587402344, "p90": 3.580688858032227, "max": 5.3651580810546875, "pos_frac": 0.921875, "sample": [2.5802841186523438, 1.7957611083984375, 5.3651580810546875, 2.3704376220703125, 1.534515380859375, -0.7583389282226562, 3.0516128540039062, 2.8675498962402344, 0.9558506011962891, 2.9089488983154297, 0.7064132690429688, 3.3077011108398438, 1.9480743408203125, 0.5775680541992188, 4.084800720214844, 1.0087776184082031, 0.23355484008789062, 0.8158607482910156, -0.703369140625, 0.6871261596679688, 0.6103057861328125, 1.3805007934570312, 2.2339324951171875, 1.0074272155761719, 0.7189197540283203, 2.8627243041992188, -0.19292449951171875, 1.6728324890136719, 4.530426025390625, 0.17633056640625, 0.631378173828125, 0.4031486511230469, 3.0873031616210938, 2.4271392822265625, -2.044403076171875, 2.1959457397460938, 0.3410053253173828, 1.8546581268310547, 3.5012969970703125, 4.9876708984375, 1.3721771240234375, 1.635284423828125, 4.495868682861328, 1.044219970703125, 1.0032844543457031, 0.7223281860351562, 0.0607757568359375, 0.6075363159179688, 1.972198486328125, 1.0470638275146484, 0.3275604248046875, 3.605754852294922, 3.0545654296875, 3.1041107177734375, 0.3831443786621094, 3.5222015380859375, 3.64410400390625, 0.9001979827880859, 1.4872150421142578, 0.4094200134277344, 0.40773582458496094, -0.0077056884765625, 0.8351554870605469, 2.1033172607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5688700675964355, "std": 1.6993874311447144, "min": -0.6492538452148438, "p10": -0.06626815795898433, "median": 1.1728363037109375, "p90": 3.7542041778564457, "max": 7.860443115234375, "pos_frac": 0.84375, "sample": [-0.511016845703125, 0.3739166259765625, 1.0849609375, 0.9457912445068359, 1.5832901000976562, 0.5059890747070312, 0.483489990234375, 0.03668212890625, 0.3468456268310547, 2.126516342163086, 6.8619537353515625, 3.034912109375, 0.8286895751953125, 2.881622314453125, 1.429443359375, -0.6492538452148438, 2.355794906616211, 1.537109375, 0.0933074951171875, -0.028045654296875, 3.1156845092773438, 0.0228271484375, 0.9324493408203125, -0.0048065185546875, 2.62890625, 1.525482177734375, 0.6681480407714844, 1.260711669921875, 0.7471046447753906, -0.6045074462890625, 4.680595397949219, -0.118927001953125, 3.779327392578125, -0.0130157470703125, 0.3039569854736328, 4.2252044677734375, 0.036754608154296875, 3.6955833435058594, 0.4899940490722656, 0.933837890625, 4.482440948486328, 0.9053535461425781, 1.5020904541015625, 7.860443115234375, -0.08264923095703125, 1.5132675170898438, 0.6761627197265625, 2.361042022705078, 2.9455833435058594, 1.7135238647460938, 1.560546875, 2.612640380859375, 3.2479324340820312, 1.8630142211914062, 1.0187911987304688, 1.4478225708007812, 4.190460205078125, -0.145965576171875, 2.46478271484375, 0.14754486083984375, -0.5063400268554688, 1.7913780212402344, 2.3993873596191406, 0.8111228942871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.9216313362121582, "std": 2.1292500495910645, "min": -3.8099212646484375, "p10": -0.09172191619873016, "median": 1.3089790344238281, "p90": 4.534327697753907, "max": 9.456588745117188, "pos_frac": 0.890625, "sample": [1.0905303955078125, 1.2645339965820312, 3.7587432861328125, 1.0859527587890625, 0.79034423828125, 2.39556884765625, 3.807403564453125, 2.43426513671875, -0.4374237060546875, 3.331817626953125, 6.3633575439453125, 0.7306289672851562, 1.3305816650390625, 9.456588745117188, 0.31235694885253906, 0.8515625, 1.0499114990234375, 1.7873954772949219, 0.219635009765625, -1.3870849609375, 1.6442947387695312, 3.413288116455078, 4.6300048828125, 1.035409927368164, 6.86505126953125, 2.096668243408203, 4.0784912109375, 4.877899169921875, 1.0536956787109375, 0.5658378601074219, 2.345020294189453, 0.577880859375, 7.19244384765625, 0.7155723571777344, 1.1766128540039062, 3.2794723510742188, 1.7600631713867188, 3.17877197265625, -0.348907470703125, 2.3280410766601562, 4.3110809326171875, 1.5166130065917969, -3.8099212646484375, 3.244171142578125, 0.46746826171875, 0.7839813232421875, 1.9025421142578125, 1.715087890625, 2.1960678100585938, 0.4654998779296875, 1.2873764038085938, 0.9652481079101562, 1.6168212890625, 0.6114120483398438, 0.5409412384033203, 1.2434329986572266, -0.34770965576171875, 3.5943527221679688, 0.6857376098632812, 0.22812461853027344, -0.2251605987548828, -0.2707977294921875, 2.409526824951172, 5.1502227783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.6327667236328125, "std": 2.6192259788513184, "min": -1.8218231201171875, "p10": 0.14609298706054694, "median": 1.9888267517089844, "p90": 6.055610656738282, "max": 11.90185546875, "pos_frac": 0.90625, "sample": [1.5493011474609375, 0.21417999267578125, 2.0932750701904297, 10.815185546875, 11.90185546875, 3.06048583984375, -0.11299896240234375, 5.1624603271484375, 2.2115821838378906, 2.866016387939453, 1.8704299926757812, 6.4287261962890625, 2.813089370727539, 8.64117431640625, 0.8540363311767578, 1.8421096801757812, 3.0835113525390625, -0.16823577880859375, 3.4284591674804688, 3.3739395141601562, 1.2593307495117188, 1.495290756225586, 3.795501708984375, 0.5430221557617188, 1.1126174926757812, 3.3809585571289062, -0.917877197265625, 2.8852691650390625, 1.7025222778320312, -0.45415496826171875, 2.0460777282714844, 5.7314300537109375, 0.29831695556640625, 7.04656982421875, 3.0405731201171875, 2.0248336791992188, 1.0790843963623047, 1.5055694580078125, 1.95281982421875, -1.1409645080566406, 3.9031982421875, 0.116912841796875, 1.25006103515625, 6.5724639892578125, 1.4497261047363281, 3.6711196899414062, 1.1035633087158203, 0.7187614440917969, 1.3008308410644531, 1.3465957641601562, 0.7476577758789062, 6.1162872314453125, -1.8218231201171875, 0.6031990051269531, 2.1671600341796875, 3.6184654235839844, 0.4228248596191406, 1.5436553955078125, 5.914031982421875, 2.1149444580078125, 4.60504150390625, 5.094268798828125, 1.0079822540283203, 4.6147613525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.5607893466949463, "std": 1.9980591535568237, "min": -1.2788848876953125, "p10": 0.19766883850097658, "median": 2.4615650177001953, "p90": 4.570140838623047, "max": 10.210693359375, "pos_frac": 0.953125, "sample": [0.062465667724609375, 0.23827743530273438, 0.10905075073242188, 3.7278976440429688, 3.9041213989257812, 1.763031005859375, 3.7174224853515625, 3.1537322998046875, 2.65869140625, 6.373260498046875, 0.8194732666015625, 1.8936195373535156, 0.8474845886230469, 2.487377166748047, 3.228618621826172, 1.20794677734375, 3.5874481201171875, 5.935997009277344, 1.9497833251953125, 0.43304443359375, 6.926025390625, 3.4355506896972656, 5.973655700683594, 1.2198867797851562, 3.038837432861328, 2.4357528686523438, 0.22474288940429688, 2.537931442260742, -0.029203414916992188, 4.6068878173828125, 1.3284835815429688, 2.5009822845458984, 3.74945068359375, 2.6668701171875, 3.4589309692382812, 3.705188751220703, 0.1669921875, 2.378631591796875, 2.326303482055664, 0.7658309936523438, 1.8105545043945312, 2.1614227294921875, 6.3387603759765625, 3.6868133544921875, 1.627899169921875, -1.2788848876953125, 1.2063312530517578, 3.4268875122070312, 2.3912506103515625, 2.64398193359375, 2.644939422607422, 4.047706604003906, 1.995361328125, 0.2836017608642578, 1.4411392211914062, 10.210693359375, 3.662872314453125, 1.639862060546875, -0.21803665161132812, 0.5575485229492188, 3.582733154296875, 4.484397888183594, 3.8401336669921875, 0.186065673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.832730293273926, "std": 2.247408866882324, "min": -1.0105056762695312, "p10": 0.344957160949707, "median": 2.3769683837890625, "p90": 5.740870666503906, "max": 10.27825927734375, "pos_frac": 0.96875, "sample": [4.018241882324219, 2.8006439208984375, 1.6127128601074219, 0.8031291961669922, 6.8935546875, 2.0886764526367188, 3.150951385498047, 1.1487503051757812, 2.2581634521484375, 5.217674255371094, 6.7105255126953125, 0.8655509948730469, 4.306365966796875, 5.8002471923828125, 5.21453857421875, 4.073577880859375, 1.0198745727539062, 2.138092041015625, 2.1863574981689453, 5.5900115966796875, 0.34859275817871094, 2.8535919189453125, 2.5568485260009766, 7.65936279296875, 2.4957733154296875, 2.8180007934570312, 0.31990814208984375, 1.5319366455078125, 5.513771057128906, 1.7235031127929688, 2.7783203125, 0.13892364501953125, 1.8678455352783203, 1.3658065795898438, 1.8021240234375, 3.49267578125, 0.21160888671875, 1.7061233520507812, 5.602325439453125, 2.5347442626953125, 1.0649566650390625, 1.9987335205078125, 4.218902587890625, 3.9451828002929688, 1.5552978515625, 3.369232177734375, 2.6231536865234375, 2.86834716796875, 0.5335559844970703, 0.3433990478515625, 6.575958251953125, 4.290382385253906, 2.246795654296875, 3.641357421875, 0.9622116088867188, 10.27825927734375, 0.7334251403808594, -0.9455108642578125, 3.392364501953125, 1.1823577880859375, -1.0105056762695312, 0.2571849822998047, 1.9145927429199219, 8.035682678222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.307036876678467, "std": 2.6632320880889893, "min": -2.0861053466796875, "p10": -0.27487831115722655, "median": 1.6747493743896484, "p90": 5.953428649902344, "max": 11.9302978515625, "pos_frac": 0.859375, "sample": [3.9606170654296875, 1.3778839111328125, 0.3582305908203125, 3.1988677978515625, 0.35242462158203125, 1.8828182220458984, 3.845783233642578, 6.79168701171875, 5.3484649658203125, 2.78363037109375, 0.1576080322265625, 8.287040710449219, 3.972188949584961, 0.10048294067382812, 0.11435127258300781, 1.7150535583496094, 0.10088348388671875, 1.7229270935058594, 0.09440040588378906, 5.9334869384765625, -0.2766876220703125, 0.2735099792480469, 2.290069580078125, 0.7864265441894531, 0.6946296691894531, 5.495573043823242, 6.829833984375, 4.185935974121094, 0.24695205688476562, 0.37003135681152344, 5.3256683349609375, 2.6386032104492188, 5.89935302734375, -0.2465057373046875, -0.4238739013671875, 1.6636962890625, 6.446929931640625, 3.970592498779297, -0.3749847412109375, 1.2953567504882812, 1.6858024597167969, 0.3071441650390625, -2.0861053466796875, -0.6982040405273438, 1.4603118896484375, 11.9302978515625, 1.0936508178710938, 7.9046783447265625, 3.1016921997070312, -0.7922859191894531, 2.5715694427490234, 1.8523330688476562, 1.440164566040039, 0.397979736328125, 1.470785140991211, 5.96197509765625, 1.1797637939453125, 2.50592041015625, 2.287229537963867, 1.9882049560546875, -0.2706565856933594, 3.457080841064453, 0.7576885223388672, -1.0466079711914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 3.0271389484405518, "std": 3.307697296142578, "min": -2.683135986328125, "p10": 0.05946693420410159, "median": 2.0331897735595703, "p90": 6.9008224487304695, "max": 16.746337890625, "pos_frac": 0.90625, "sample": [0.4969158172607422, 0.20413970947265625, 5.3958587646484375, 7.11578369140625, 4.036903381347656, -1.9767379760742188, 1.9172325134277344, 1.6757068634033203, 7.1019134521484375, 3.5291175842285156, 6.386768341064453, 1.7789688110351562, -0.148284912109375, 1.7751274108886719, 5.045135498046875, 0.08751296997070312, 0.04744720458984375, 0.881805419921875, 4.706413269042969, 12.164031982421875, -0.27943992614746094, 0.6448974609375, -0.1998291015625, 3.3281822204589844, 0.3273468017578125, 4.666786193847656, 3.9210052490234375, 1.257955551147461, 4.449287414550781, 2.245668411254883, 4.283943176269531, 0.3501605987548828, -2.683135986328125, 0.696014404296875, 4.0242462158203125, 6.332942962646484, 5.6678619384765625, 0.18825531005859375, 8.294387817382812, 0.30033111572265625, 0.34688377380371094, 4.275184631347656, 5.987678527832031, 0.20102691650390625, 16.746337890625, 0.43209075927734375, 6.9661407470703125, 6.139495849609375, 0.5780372619628906, 2.195037841796875, 1.6947174072265625, 8.162109375, 3.0503158569335938, 0.44821929931640625, 1.9234046936035156, 6.7484130859375, 1.696197509765625, 2.5089759826660156, 3.3674392700195312, 5.623321533203125, 2.142974853515625, 1.117696762084961, 1.502096176147461, -0.1555328369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 3.016704559326172, "std": 2.8151016235351562, "min": -2.9661865234375, "p10": 0.04059658050537117, "median": 2.2901687622070312, "p90": 6.908408355712891, "max": 11.14984130859375, "pos_frac": 0.90625, "sample": [2.099029541015625, 3.14007568359375, 0.5067596435546875, 1.4358882904052734, 5.131168365478516, 1.6663703918457031, 0.9113998413085938, 2.0577468872070312, 7.224395751953125, 2.8515968322753906, -0.403656005859375, 6.465068817138672, 6.968757629394531, 1.245452880859375, 3.821521759033203, 2.1885299682617188, 0.1141510009765625, 1.7191505432128906, 2.3167877197265625, 5.395454406738281, 7.029609680175781, 2.024993896484375, 0.7394313812255859, 3.519367218017578, 3.578582763671875, 1.5098705291748047, -0.6415176391601562, 6.226806640625, 8.260406494140625, 2.8107967376708984, -0.30893898010253906, 3.412445068359375, 6.005802154541016, 4.516021728515625, -2.9661865234375, 0.009073257446289062, -0.041229248046875, 1.3406391143798828, 0.6616363525390625, 11.14984130859375, 6.185882568359375, 2.337200164794922, 0.9980335235595703, 2.62872314453125, 9.782394409179688, 6.7675933837890625, 0.17327117919921875, 3.370553970336914, 9.46649169921875, -1.4161148071289062, 1.3861007690429688, 2.2635498046875, 0.6704254150390625, 1.5417366027832031, 2.8035125732421875, 0.8965072631835938, 6.2178802490234375, 1.8349761962890625, 4.5634765625, 1.4916629791259766, 4.7552337646484375, 3.9933319091796875, 2.4434146881103516, 2.2201480865478516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 3.2726054191589355, "std": 3.2548775672912598, "min": -7.9955902099609375, "p10": 0.4001842498779297, "median": 2.52388858795166, "p90": 8.220448303222659, "max": 13.104690551757812, "pos_frac": 0.90625, "sample": [4.517730712890625, 1.0113677978515625, 8.401443481445312, 9.494575500488281, 7.202545166015625, 3.701904296875, -0.5525054931640625, 2.6594886779785156, 5.883506774902344, 4.5482025146484375, 1.7852020263671875, 5.82122802734375, 2.5378494262695312, 0.4312744140625, 0.7413253784179688, 1.1741981506347656, 7.798126220703125, -0.06743812561035156, 1.1445083618164062, -0.20957565307617188, 1.4285697937011719, 8.936752319335938, -0.5970935821533203, 2.4099197387695312, 6.983924865722656, 1.73040771484375, 4.294891357421875, 3.329130172729492, 0.3868598937988281, 3.7239723205566406, 2.0967445373535156, 2.509927749633789, 3.6754302978515625, 2.3401947021484375, 8.46826171875, 4.1453399658203125, 5.244163513183594, 9.787063598632812, 3.745635986328125, 1.0273513793945312, 0.9175872802734375, 1.8293933868408203, 1.2688827514648438, 2.8974838256835938, 1.546630859375, 4.4233551025390625, 3.3048954010009766, 1.9210472106933594, -1.995382308959961, 8.886299133300781, 2.9556751251220703, 3.4969329833984375, 2.4191741943359375, 13.104690551757812, 5.0561676025390625, -7.9955902099609375, 6.152488708496094, 1.746225357055664, 1.185750961303711, 0.995269775390625, 5.0113525390625, 2.2211151123046875, 2.4298057556152344, 1.9750900268554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.8355050086975098, "std": 4.151994705200195, "min": -3.9075927734375, "p10": -0.1636257171630857, "median": 3.4157724380493164, "p90": 8.508027648925783, "max": 17.454116821289062, "pos_frac": 0.890625, "sample": [3.156097412109375, 0.3267707824707031, 0.1184539794921875, 3.523468017578125, 2.540952682495117, 5.387599945068359, 0.6974563598632812, 2.9353160858154297, -2.14337158203125, 0.28600311279296875, 0.07599258422851562, 2.232309341430664, 3.0031204223632812, 9.407173156738281, 7.568733215332031, 2.38421630859375, 4.536041259765625, 2.1654014587402344, 4.261085510253906, 1.317169189453125, 2.548044204711914, -3.381532669067383, 4.0822601318359375, -0.26631927490234375, 15.154571533203125, 10.990280151367188, 1.4863739013671875, 6.1795654296875, 4.266937255859375, 0.5221099853515625, 8.049667358398438, 17.454116821289062, 4.432220458984375, 1.3223724365234375, -2.2642288208007812, -3.9075927734375, 4.590309143066406, 3.437288284301758, 4.573223114013672, 4.8142547607421875, 3.0732784271240234, 0.09115409851074219, -0.665557861328125, 5.188591003417969, 5.514167785644531, 14.511260986328125, 2.4919281005859375, 3.394256591796875, 8.7044677734375, 7.4110870361328125, 6.899250030517578, 3.4877281188964844, 5.125587463378906, 5.432304382324219, 13.590499877929688, 0.6044883728027344, -1.102874755859375, 3.4663162231445312, 1.7261028289794922, 4.9132537841796875, 5.96356201171875, 1.8287811279296875, 4.765216827392578, 1.1935577392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 4.148880958557129, "std": 4.069746971130371, "min": -2.7661304473876953, "p10": -0.6259403228759763, "median": 3.2105655670166016, "p90": 9.720663070678713, "max": 15.35650634765625, "pos_frac": 0.859375, "sample": [7.653411865234375, -0.42258453369140625, -2.0522918701171875, 10.461532592773438, 1.4102363586425781, 2.635650634765625, 7.893714904785156, 2.3874244689941406, 3.662364959716797, 1.9551401138305664, -1.0226058959960938, 7.3738861083984375, 3.7952041625976562, 15.201583862304688, 0.3519744873046875, 2.59381103515625, 7.7807769775390625, 3.398487091064453, 10.043014526367188, 3.72882080078125, 0.7470645904541016, 5.1687774658203125, 10.328514099121094, 2.5324554443359375, 2.2385025024414062, 5.259346008300781, 0.08089447021484375, -0.7130928039550781, 3.2320289611816406, 9.20758056640625, 7.266822814941406, 5.791358947753906, 9.940555572509766, 15.35650634765625, -2.7661304473876953, 0.6331977844238281, 1.5556793212890625, 2.2163848876953125, 0.2380352020263672, 3.1891021728515625, 2.0966796875, 1.7606201171875, 6.760643005371094, 4.026679992675781, 8.02297592163086, 7.453300476074219, 0.21504974365234375, 1.8729362487792969, 6.002716064453125, -0.9999771118164062, 6.688720703125, 2.9918251037597656, 4.797107696533203, 6.365364074707031, 2.617767333984375, 6.332000732421875, 8.599784851074219, 1.8802947998046875, 12.209205627441406, -0.95806884765625, 7.188320159912109, 2.071319580078125, -2.7485580444335938, -0.05145454406738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 4.106563568115234, "std": 5.284502983093262, "min": -6.8819732666015625, "p10": -0.44977531433105467, "median": 2.878814697265625, "p90": 10.67720413208008, "max": 21.123504638671875, "pos_frac": 0.828125, "sample": [10.3250732421875, 4.2064208984375, 0.8690452575683594, -0.4475135803222656, 0.8570098876953125, -0.92681884765625, 9.733741760253906, -0.3847503662109375, 2.1212005615234375, -0.2186737060546875, 12.24169921875, 7.537483215332031, 1.8665218353271484, 4.865898132324219, 2.369901657104492, 4.6087188720703125, 3.9068565368652344, -0.8113803863525391, 10.828117370605469, 6.108188629150391, 4.476291656494141, -0.45074462890625, 0.1457061767578125, 3.5239601135253906, 1.150604248046875, 8.259479522705078, 0.6440773010253906, 5.967754364013672, 0.8015594482421875, 5.663299560546875, 21.123504638671875, 3.114042282104492, 0.5088462829589844, 6.0078887939453125, 3.263204574584961, 15.82745361328125, 1.7693042755126953, 16.228652954101562, 5.1455078125, 2.192087173461914, 3.0896453857421875, 0.8207664489746094, 4.2412872314453125, 10.057868957519531, 0.9290351867675781, 0.2563514709472656, 5.99188232421875, -3.7455978393554688, -0.05243492126464844, 19.463241577148438, 13.465011596679688, 7.7028045654296875, 8.15176010131836, -0.65618896484375, 1.3456382751464844, 6.5512847900390625, 0.07061958312988281, 0.22388458251953125, 2.8274383544921875, -6.8819732666015625, 2.874847412109375, -1.9057464599609375, 2.882781982421875, 0.09662628173828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 6.246335983276367, "std": 5.25726318359375, "min": -2.8212203979492188, "p10": 0.8254940032958985, "median": 5.114591598510742, "p90": 13.386956787109375, "max": 22.405059814453125, "pos_frac": 0.921875, "sample": [1.6077136993408203, 13.404022216796875, 4.5440826416015625, 12.905624389648438, 7.484016418457031, 11.495513916015625, 3.20843505859375, 7.5046539306640625, 8.107391357421875, 14.065353393554688, -0.203125, 13.965728759765625, 5.153095245361328, 5.860137939453125, 9.299837112426758, 8.349458694458008, 2.4588088989257812, 5.250232696533203, 17.851791381835938, 10.41168212890625, 3.6484603881835938, 0.8087615966796875, 13.050804138183594, 2.959444046020508, 0.06516838073730469, 8.211563110351562, 2.8151473999023438, 1.8399944305419922, 9.315458297729492, 22.405059814453125, 9.820953369140625, 1.829488754272461, 11.6466064453125, 4.714820861816406, 4.354728698730469, 14.430191040039062, 4.011722564697266, -1.1693572998046875, 5.076087951660156, -2.0118331909179688, -2.8212203979492188, 2.5215377807617188, 6.643564224243164, 5.9991455078125, 2.326610565185547, 10.637840270996094, 4.562908172607422, 13.347137451171875, 7.716602325439453, 1.7496204376220703, 0.8645362854003906, 4.178314208984375, 3.7670669555664062, 5.2171173095703125, 3.0485992431640625, 10.395004272460938, 6.4697113037109375, 1.1464309692382812, 18.64746856689453, -1.7165985107421875, 6.4307098388671875, 4.210212707519531, 2.481647491455078, 1.3938484191894531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 6.35241174697876, "std": 7.0986785888671875, "min": -3.31951904296875, "p10": 0.28123168945312527, "median": 4.281949996948242, "p90": 15.967517852783205, "max": 32.615753173828125, "pos_frac": 0.921875, "sample": [-1.5388679504394531, 4.680473327636719, 3.6807403564453125, 3.18212890625, 0.8484039306640625, 3.6463241577148438, 14.324851989746094, 0.1235198974609375, 8.182395935058594, -2.1199188232421875, 1.8789825439453125, 30.481201171875, -2.851734161376953, 1.4283981323242188, 7.160316467285156, 8.224407196044922, 1.304473876953125, 11.871353149414062, 2.8478012084960938, 13.428276062011719, 4.309032440185547, 6.088920593261719, 13.062538146972656, 7.275459289550781, 17.263534545898438, 1.6652297973632812, 4.994110107421875, 0.5474700927734375, 5.709190368652344, 1.8407726287841797, 19.301025390625, 8.713542938232422, 7.705486297607422, 8.7904052734375, 4.100959777832031, 0.1671295166015625, 0.5903244018554688, 4.435108184814453, 0.9713993072509766, 4.803314208984375, 3.6066360473632812, 8.131845474243164, 1.8599815368652344, 15.393623352050781, -3.31951904296875, 19.299575805664062, 18.439071655273438, 2.98699951171875, 4.2548675537109375, 3.2011585235595703, 2.0410499572753906, 5.097461700439453, 2.9845733642578125, 1.6355514526367188, 3.9910659790039062, 4.420032501220703, 3.0211544036865234, 32.615753173828125, 2.6302757263183594, 16.0660400390625, 5.010021209716797, 4.552886962890625, 15.737632751464844, -0.2218456268310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.53140115737915, "std": 5.27100944519043, "min": -1.204559326171875, "p10": 0.31184539794921884, "median": 4.485828399658203, "p90": 12.615885925292972, "max": 22.211074829101562, "pos_frac": 0.9375, "sample": [8.352203369140625, 5.7734832763671875, 9.763717651367188, 1.32525634765625, 0.6908950805664062, 3.5969696044921875, 0.4190521240234375, 2.508218765258789, 6.20587158203125, 8.592212677001953, 1.0561752319335938, 6.795402526855469, 12.926727294921875, 2.046079635620117, 4.292274475097656, 12.109321594238281, 0.09070014953613281, 5.182493209838867, 5.556884765625, 5.933052062988281, -0.010435104370117188, 0.45000457763671875, 18.18305206298828, 0.7513885498046875, 7.176422119140625, 0.265899658203125, 4.960727691650391, 5.5785369873046875, 6.12762451171875, 7.039512634277344, 1.8479061126708984, -0.211090087890625, 8.071342468261719, -0.3083076477050781, 1.3835468292236328, 3.5890846252441406, 12.832984924316406, 3.2533111572265625, 6.2623138427734375, 14.200469970703125, 1.8646888732910156, 2.9474411010742188, 22.211074829101562, 11.337471008300781, -1.204559326171875, 2.7866382598876953, 0.2332592010498047, 14.67254638671875, 0.6235065460205078, 4.67938232421875, 1.576141357421875, 4.689533233642578, 11.380996704101562, 1.811767578125, 2.527801513671875, 20.883514404296875, 1.2126960754394531, 4.999422073364258, 2.4695053100585938, 11.65679931640625, 2.533994674682617, 10.173614501953125, 1.4124202728271484, 11.868736267089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.5409345626831055, "std": 7.751531600952148, "min": -12.503189086914062, "p10": -1.7840229034423825, "median": 5.8554534912109375, "p90": 16.921681213378907, "max": 29.895980834960938, "pos_frac": 0.78125, "sample": [7.727912902832031, 10.233245849609375, 4.7605743408203125, -4.612701416015625, 25.874298095703125, -2.4981918334960938, 7.578685760498047, 4.565998077392578, 17.67327880859375, -1.18463134765625, 7.1945037841796875, 6.1352081298828125, 2.6480636596679688, 11.713760375976562, 20.987579345703125, 9.310256958007812, 5.054496765136719, 10.257591247558594, 29.895980834960938, 3.7095603942871094, 11.39117431640625, -2.26513671875, 4.2075347900390625, 9.095428466796875, 25.649276733398438, 0.2655620574951172, 17.179443359375, 0.28220367431640625, 0.6162452697753906, 8.647380828857422, 4.877799987792969, -2.5491943359375, 7.235542297363281, -0.0334014892578125, 13.26251220703125, 4.84027099609375, -1.9490890502929688, -0.20274734497070312, -0.5696582794189453, -5.4785308837890625, 10.22280502319336, 3.5310134887695312, 3.0578765869140625, 15.590019226074219, 6.718193054199219, -1.3988685607910156, 1.064859390258789, 11.8056640625, 5.957786560058594, -12.503189086914062, 3.1187973022460938, 16.320236206054688, 7.0707550048828125, 4.387825012207031, 7.832553863525391, 6.184572219848633, -1.0876579284667969, 7.08258056640625, 8.582130432128906, 14.015762329101562, 5.753120422363281, -0.27191925048828125, 5.2838287353515625, 18.772979736328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 6.270113945007324, "std": 7.630791664123535, "min": -10.240413665771484, "p10": -2.1194400787353507, "median": 5.110395431518555, "p90": 16.320713043212898, "max": 28.638397216796875, "pos_frac": 0.796875, "sample": [-2.626190185546875, 8.123165130615234, -3.414886474609375, 9.093429565429688, -0.8333244323730469, -10.240413665771484, 10.349876403808594, -1.3458976745605469, 4.3791961669921875, -4.179637908935547, 3.8563385009765625, 2.2011260986328125, 4.952323913574219, 17.151611328125, 2.634706497192383, -0.14295196533203125, -0.6418190002441406, 12.225914001464844, 10.675392150878906, 8.483848571777344, -2.450958251953125, 12.827468872070312, 3.23504638671875, 24.318328857421875, 9.14434814453125, 18.489715576171875, 1.4310951232910156, 0.326141357421875, 10.004709243774414, 2.6059341430664062, 7.4055328369140625, 7.939666748046875, 10.09442138671875, 1.6781578063964844, 11.274192810058594, 11.834407806396484, -0.16954421997070312, 20.882904052734375, 7.610923767089844, 6.71185302734375, 5.501108169555664, 4.997920989990234, 3.054990768432617, 22.6787109375, 7.596931457519531, 14.381950378417969, 6.18321418762207, 11.169937133789062, 0.8207130432128906, 28.638397216796875, 1.176218032836914, 0.0060558319091796875, 4.273674011230469, 14.129165649414062, -7.2888641357421875, 3.577543258666992, -0.854705810546875, 22.098548889160156, 5.222869873046875, -3.8371200561523438, 2.951629638671875, 1.9582443237304688, 6.530548095703125, 10.423477172851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 7.098737716674805, "std": 6.918424129486084, "min": -11.896484375, "p10": -0.06650829315185522, "median": 6.6944427490234375, "p90": 15.658733367919924, "max": 25.442672729492188, "pos_frac": 0.890625, "sample": [9.148914337158203, 3.466114044189453, -0.171783447265625, -3.3403778076171875, 11.488494873046875, -11.896484375, 13.392074584960938, 0.29273223876953125, 14.93597412109375, 6.918247222900391, 17.86669921875, 3.8393001556396484, 25.442672729492188, 3.8852081298828125, 8.403396606445312, 7.9864959716796875, -1.2084903717041016, 3.9560298919677734, 6.6842803955078125, 21.936187744140625, 10.508174896240234, 18.874496459960938, 12.450942993164062, 4.34605598449707, 7.424955368041992, 1.421499252319336, 15.353279113769531, 5.508527755737305, 4.698997497558594, 2.6191253662109375, 0.6759185791015625, 4.107917785644531, 3.3166961669921875, 0.17418479919433594, 7.8236083984375, 8.519287109375, -0.1696624755859375, 1.7133293151855469, 3.0675125122070312, 9.728843688964844, 12.55810546875, 1.972341537475586, 10.654609680175781, 6.016471862792969, 10.524932861328125, -2.7050743103027344, 9.295761108398438, 7.8166351318359375, 10.470428466796875, 3.733592987060547, 2.34588623046875, 20.315689086914062, 5.790744781494141, 3.6312637329101562, 6.7046051025390625, 4.615913391113281, 0.6523456573486328, 13.779289245605469, 13.782089233398438, -6.834808349609375, 15.789642333984375, 21.94097900390625, 8.344259262084961, 7.934112548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.912272930145264, "std": 8.49140739440918, "min": -10.428070068359375, "p10": -0.6926239013671869, "median": 5.746929168701172, "p90": 18.694902038574224, "max": 29.877975463867188, "pos_frac": 0.875, "sample": [17.000946044921875, -2.5036144256591797, 0.5261878967285156, 5.209480285644531, 4.3533935546875, 4.627843856811523, 22.817890167236328, 2.3597335815429688, 16.777198791503906, 2.875703811645508, 6.673435211181641, 7.483543395996094, 8.706062316894531, 4.307771682739258, 2.5117454528808594, 13.16415786743164, 11.855316162109375, 9.846805572509766, 4.311206817626953, 3.1833057403564453, 9.11328125, 15.618377685546875, 15.238197326660156, -2.2420501708984375, 13.156524658203125, 1.0367279052734375, 10.249881744384766, 7.05394172668457, 24.358169555664062, 4.534231185913086, 29.877975463867188, 8.62973403930664, 6.2843780517578125, -0.038787841796875, 17.495826721191406, 0.789276123046875, 16.007644653320312, 4.975700378417969, 2.6273040771484375, -1.6480255126953125, 2.6713027954101562, 3.0802860260009766, 28.148223876953125, -2.312347412109375, 11.567214965820312, -0.97283935546875, 0.9610958099365234, 7.779998779296875, -5.1436767578125, 27.892364501953125, 1.894540786743164, 6.397552490234375, 0.7452220916748047, 16.083816528320312, 11.632999420166016, -10.428070068359375, 23.7410888671875, 2.54473876953125, 3.1772232055664062, 7.8492431640625, 17.792579650878906, 2.086465835571289, 0.9083976745605469, 19.08161163330078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 8.744604110717773, "std": 9.221623420715332, "min": -9.136016845703125, "p10": -1.6413337707519529, "median": 7.171625137329102, "p90": 21.752740478515626, "max": 37.00224304199219, "pos_frac": 0.875, "sample": [9.906585693359375, 10.334152221679688, 18.1680908203125, 21.110931396484375, 17.972137451171875, 5.251708984375, 37.00224304199219, 23.999343872070312, 8.142383575439453, 18.553543090820312, 0.6944599151611328, 7.049808502197266, -4.609638214111328, 2.992769241333008, 25.95025634765625, -1.3107147216796875, 2.8201160430908203, 16.156883239746094, 6.64794921875, 24.53802490234375, 1.4434280395507812, 2.6902084350585938, 1.2899551391601562, 1.4712333679199219, 5.185508728027344, 31.4144287109375, 2.5923118591308594, 2.0712242126464844, 9.667858123779297, -9.136016845703125, 9.991218566894531, -2.468738555908203, 9.735496520996094, 8.676006317138672, 10.515281677246094, 20.749427795410156, 3.9913177490234375, 14.264595031738281, 8.991020202636719, 3.989276885986328, -6.952812194824219, -3.399383544921875, 6.64427375793457, -1.7830276489257812, 5.093173980712891, 10.981529235839844, 8.057586669921875, 3.1872100830078125, 8.758644104003906, 2.8262557983398438, 1.3108558654785156, 7.2934417724609375, 3.387491226196289, 10.832386016845703, 6.823482513427734, 17.251068115234375, 10.967361450195312, 4.883480072021484, 4.287689208984375, -3.2506942749023438, 20.00643539428711, 22.027801513671875, 8.184558868408203, 23.73980712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.913043975830078, "std": 8.146025657653809, "min": -18.18035888671875, "p10": -2.356009292602539, "median": 7.28798770904541, "p90": 14.569689178466799, "max": 27.755531311035156, "pos_frac": 0.8125, "sample": [8.815780639648438, -6.4972686767578125, 2.1991214752197266, 12.568939208984375, 1.327789306640625, 3.6358108520507812, 13.213035583496094, 9.18927001953125, 1.9211196899414062, -18.18035888671875, 7.237512588500977, -2.90069580078125, 2.2943077087402344, 4.315374374389648, 12.605415344238281, 7.4209442138671875, 5.165069580078125, 13.233833312988281, 6.864997863769531, 12.756965637207031, 13.627937316894531, 14.856704711914062, 22.3389892578125, 10.727836608886719, 9.294036865234375, 8.887870788574219, -0.8300151824951172, 7.338462829589844, 13.714996337890625, -0.021068572998046875, 11.210086822509766, 20.992599487304688, 4.642765045166016, 6.8242950439453125, 9.160518646240234, -2.196002960205078, 12.538421630859375, 8.201156616210938, 2.6766815185546875, -7.10980224609375, 13.285942077636719, 4.127376556396484, 5.7169647216796875, 10.050277709960938, 4.615447998046875, -1.4889717102050781, -13.835521697998047, 18.939529418945312, 24.137725830078125, 13.899986267089844, 15.243019104003906, 3.5931930541992188, -2.4245834350585938, -1.5597915649414062, 11.837779998779297, 7.004266738891602, 27.755531311035156, 11.530078887939453, 1.65765380859375, 10.567510604858398, -2.5779647827148438, 9.969482421875, 5.021770477294922, 1.3046798706054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 8.706412315368652, "std": 12.270907402038574, "min": -11.677268981933594, "p10": -2.3688270568847654, "median": 6.17854118347168, "p90": 22.199123382568363, "max": 68.82073974609375, "pos_frac": 0.796875, "sample": [3.0536251068115234, 1.8712444305419922, -2.5200271606445312, 0.09999847412109375, 17.936172485351562, -0.58544921875, 0.5977153778076172, 9.76718521118164, -2.0816268920898438, 33.28596496582031, 26.060333251953125, 22.87628173828125, 28.771865844726562, 15.746978759765625, 9.648269653320312, 11.852386474609375, -1.7853622436523438, 1.719461441040039, 12.924980163574219, 13.769859313964844, 3.5455322265625, 5.771766662597656, 19.585739135742188, 10.497127532958984, 7.212066650390625, 7.059045791625977, 6.585315704345703, 7.77093505859375, 11.085163116455078, 9.766151428222656, -1.3097381591796875, -2.491912841796875, 13.583641052246094, 19.551544189453125, 20.738250732421875, 26.980010986328125, 20.952926635742188, 2.416698455810547, -3.4285888671875, 15.839157104492188, 3.093769073486328, 0.4442310333251953, 16.502166748046875, 3.732830047607422, 0.9768581390380859, 22.73320770263672, 1.53717041015625, 4.333213806152344, 68.82073974609375, 15.408218383789062, 14.418502807617188, -1.9670028686523438, -11.677268981933594, 14.001506805419922, 2.6327991485595703, -6.363544464111328, 5.0766754150390625, 3.170501708984375, -9.316421508789062, 3.3458900451660156, 8.467071533203125, -7.033103942871094, -1.5487442016601562, 1.7004470825195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 6.545290946960449, "std": 8.873523712158203, "min": -13.001983642578125, "p10": -4.632846641540526, "median": 5.920321464538574, "p90": 17.872087478637695, "max": 35.351219177246094, "pos_frac": 0.859375, "sample": [10.349151611328125, 1.865142822265625, 0.5829133987426758, 12.799514770507812, 5.9363250732421875, 13.557998657226562, 9.198965072631836, 1.0992698669433594, 10.115135192871094, -6.96624755859375, 17.720428466796875, 7.297760009765625, 1.9448814392089844, 0.14075851440429688, 11.917991638183594, 4.0704345703125, 12.776763916015625, 6.8941650390625, 0.20914077758789062, 5.518318176269531, 8.338005065917969, 8.143264770507812, 1.8404083251953125, 6.876993179321289, -5.694488525390625, 11.210647583007812, 4.642927169799805, 9.758155822753906, -0.0291290283203125, 35.351219177246094, 19.32740020751953, 10.568473815917969, 7.8283538818359375, 2.9984169006347656, 0.714141845703125, 7.157264709472656, 1.9965133666992188, 21.7003173828125, -13.001983642578125, -5.065864562988281, -7.5033721923828125, 0.4828948974609375, 1.2760543823242188, 0.8805007934570312, 1.3482589721679688, 10.370733261108398, 17.937084197998047, 11.132747650146484, -3.9821949005126953, 19.981048583984375, 10.057903289794922, 5.904317855834961, -9.114044189453125, 4.1102752685546875, 15.188179016113281, 3.981311798095703, 3.9501914978027344, 3.5752716064453125, 31.857955932617188, 24.709213256835938, 7.610109329223633, 2.0332717895507812, -4.9116973876953125, 6.3327484130859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 8.791152000427246, "std": 9.819863319396973, "min": -16.691017150878906, "p10": -0.9937074661254879, "median": 8.599660873413086, "p90": 20.397392272949226, "max": 39.648101806640625, "pos_frac": 0.875, "sample": [28.94439697265625, 9.360458374023438, 5.320680618286133, 10.114463806152344, 0.9222412109375, 1.0109481811523438, 6.1608734130859375, -9.730789184570312, 7.291412353515625, 8.783336639404297, -16.691017150878906, 18.924407958984375, 9.023754119873047, 9.22726821899414, 3.2235565185546875, -1.1876201629638672, 21.3778076171875, 5.002302169799805, 16.19647216796875, 23.84497833251953, 14.492805480957031, 17.022872924804688, 7.2415771484375, 0.23053550720214844, -1.4656524658203125, 11.185739517211914, -4.383293151855469, 0.1962127685546875, 5.6681365966796875, 3.691875457763672, 15.742965698242188, 9.434906005859375, 9.060630798339844, 10.90200424194336, 13.786346435546875, 14.958587646484375, 17.136734008789062, 16.002830505371094, 6.678394317626953, 0.34356689453125, -0.5412445068359375, 16.693496704101562, 3.180154800415039, 5.189929962158203, -14.662673950195312, 15.654626846313477, 10.253837585449219, 2.06768798828125, 1.6363525390625, 8.355545043945312, 4.059295654296875, 17.93444061279297, 21.663818359375, 8.415985107421875, 21.028671264648438, 1.21075439453125, 10.505575180053711, 39.648101806640625, -3.8127899169921875, 17.044593811035156, 3.6465225219726562, 29.69122314453125, 4.851104736328125, 13.871017456054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 10.527725219726562, "std": 12.478157043457031, "min": -24.105422973632812, "p10": -1.5861183166503903, "median": 9.089622497558594, "p90": 26.5554183959961, "max": 58.91845703125, "pos_frac": 0.78125, "sample": [0.6289749145507812, 22.287879943847656, 31.199356079101562, 9.06534194946289, 5.711339950561523, 14.509750366210938, 3.6680641174316406, 19.404006958007812, -0.2662181854248047, 8.195905685424805, 2.95281982421875, 11.988594055175781, -0.18053436279296875, 6.051658630371094, 14.634674072265625, 38.50193786621094, 5.882133483886719, -2.390583038330078, 21.131973266601562, 9.113903045654297, 14.049217224121094, -3.3678436279296875, 33.438140869140625, 9.15493392944336, -0.5890293121337891, 5.848731994628906, -1.9154949188232422, 8.295148849487305, -24.105422973632812, -0.6549091339111328, 9.202144622802734, -1.7078628540039062, 58.91845703125, 6.485691070556641, 2.0739059448242188, 12.169879913330078, 19.78264617919922, 25.25042724609375, 12.754364013671875, 5.585903167724609, 6.978153228759766, 27.114700317382812, 15.371650695800781, 11.890449523925781, -1.3020477294921875, -10.237258911132812, 29.786849975585938, 3.748046875, 27.530548095703125, -1.2082786560058594, 18.303741455078125, 14.813278198242188, 15.379997253417969, -1.9944915771484375, 15.987075805664062, -0.3199043273925781, 5.673578262329102, 9.561878204345703, 16.993453979492188, 13.390487670898438, 0.971221923828125, 13.76080322265625, 21.392242431640625, 7.428199768066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 10.78107738494873, "std": 12.520744323730469, "min": -29.14099884033203, "p10": -1.296996688842773, "median": 8.620826721191406, "p90": 28.423220825195322, "max": 37.72212219238281, "pos_frac": 0.8125, "sample": [37.72212219238281, 29.492401123046875, 24.097885131835938, 13.439453125, 7.79163932800293, 11.649566650390625, -0.17684173583984375, 15.831806182861328, 3.7162399291992188, 25.405921936035156, 19.83746337890625, 2.4963455200195312, -0.28426170349121094, 30.025726318359375, 34.24375915527344, 0.8216133117675781, 12.124250411987305, 2.8130664825439453, -5.121124267578125, 6.39202880859375, 1.310171127319336, 23.4697265625, 8.43072509765625, -29.14099884033203, 20.935226440429688, 11.605819702148438, -4.3545684814453125, 8.810928344726562, 16.583328247070312, 5.4458465576171875, 6.198768615722656, 4.852119445800781, -2.8043556213378906, 5.929431915283203, 35.22395324707031, 8.981632232666016, 25.928466796875, 18.943431854248047, 31.668907165527344, 9.709175109863281, -1.0606842041015625, -17.975753784179688, 12.225341796875, 2.0878047943115234, 9.433837890625, 23.468528747558594, 30.36089324951172, -3.2410049438476562, 5.85382080078125, 24.181053161621094, 14.331779479980469, 6.000732421875, 25.774795532226562, -1.3982734680175781, 15.786849975585938, 12.071014404296875, 6.458076477050781, 2.681365966796875, 19.78717803955078, 8.22735595703125, -0.9036674499511719, 8.417922973632812, -0.4603157043457031, 7.833524703979492], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 9.735556602478027, "std": 12.39319133758545, "min": -12.908340454101562, "p10": -1.5858234405517575, "median": 7.925716400146484, "p90": 22.66871414184571, "max": 58.83979797363281, "pos_frac": 0.828125, "sample": [9.333740234375, 58.83979797363281, -1.1867141723632812, 19.159423828125, 15.696502685546875, 14.557228088378906, 2.8183155059814453, -0.8456344604492188, 11.572443008422852, 7.811431884765625, -12.382110595703125, 2.6602649688720703, 8.229825973510742, -1.048828125, 13.278179168701172, 4.358364105224609, 5.60051155090332, 0.2513427734375, 1.8967037200927734, 23.45941162109375, 10.976341247558594, 11.55830192565918, 0.25539398193359375, 11.862838745117188, 20.823753356933594, 2.7873001098632812, 7.298576354980469, -4.412132263183594, -4.605890274047852, -12.908340454101562, 14.278594970703125, 19.543724060058594, 20.38967514038086, 8.685325622558594, 3.440156936645508, 14.138008117675781, 5.727365493774414, -0.5724334716796875, 3.165151596069336, 16.345909118652344, 12.3785400390625, 3.4810256958007812, -1.7568702697753906, 8.040000915527344, 7.487979888916016, -9.51788330078125, 9.778709411621094, 1.5465831756591797, -5.815483093261719, 16.422462463378906, 5.145942687988281, 25.957931518554688, 32.652008056640625, 26.435379028320312, 6.2173614501953125, 20.39703369140625, 2.237548828125, 10.39957046508789, 18.654098510742188, 2.7498836517333984, 41.027313232421875, 1.5817070007324219, 38.696624755859375, 16.04035186767578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 10.138212203979492, "std": 12.635088920593262, "min": -17.881988525390625, "p10": -2.3539592742919924, "median": 7.615009307861328, "p90": 25.428036499023438, "max": 60.08306884765625, "pos_frac": 0.8125, "sample": [27.316490173339844, 6.006660461425781, 25.51367950439453, -1.02252197265625, 2.9013671875, -1.7429122924804688, 20.655136108398438, 7.142271041870117, -6.915618896484375, 2.7271041870117188, 21.17901611328125, 11.10599136352539, 3.9365463256835938, 20.298965454101562, -1.2393569946289062, -4.664943695068359, 10.0994873046875, 11.626861572265625, 2.2425308227539062, -17.881988525390625, 25.22820281982422, 9.700790405273438, -2.3659400939941406, -0.290679931640625, 7.6542816162109375, 1.6542587280273438, 26.699623107910156, 7.6120147705078125, -2.3260040283203125, 5.988945007324219, 0.211883544921875, 19.34542465209961, 9.18081283569336, 3.4675540924072266, 4.5834503173828125, 11.027873992919922, 17.28184700012207, 8.166868209838867, 1.4009437561035156, -6.333953857421875, 7.582374572753906, 21.841346740722656, 5.366371154785156, 3.867361068725586, 32.40187072753906, 17.821670532226562, 6.89726448059082, 21.314178466796875, 16.39698028564453, 17.939849853515625, 12.368995666503906, 6.405029296875, 13.029291152954102, 41.4935302734375, 7.618003845214844, 1.4584426879882812, 60.08306884765625, 3.2161216735839844, -3.8462066650390625, 31.81311798095703, 23.775680541992188, 8.706809997558594, -5.717376708984375, 9.838884353637695], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 9.685720443725586, "std": 10.614004135131836, "min": -12.366912841796875, "p10": -0.5221698760986325, "median": 7.789722442626953, "p90": 24.63611450195313, "max": 47.20503234863281, "pos_frac": 0.875, "sample": [-7.931724548339844, 0.4249267578125, 31.998809814453125, 10.970321655273438, 6.365503311157227, -8.878890991210938, 11.777862548828125, 27.264564514160156, 3.2178573608398438, 0.7605400085449219, 17.331809997558594, 18.233535766601562, 22.8876953125, 14.568367004394531, 9.785018920898438, -2.5646591186523438, 8.130046844482422, 16.815441131591797, 5.8595428466796875, 11.151435852050781, -2.4027862548828125, 5.492719650268555, 6.206205368041992, -0.647186279296875, 9.790494918823242, 12.590644836425781, 22.116714477539062, 9.519134521484375, 11.26815414428711, 22.70818328857422, 25.38543701171875, 28.70743179321289, 0.9813766479492188, 8.198692321777344, 12.242340087890625, 2.040821075439453, -0.23046493530273438, 7.565704345703125, 2.4462661743164062, 7.301368713378906, 21.172718048095703, 2.8787193298339844, 11.430459976196289, -12.366912841796875, 8.013740539550781, 6.269172668457031, 4.728202819824219, 26.27819061279297, 1.4226226806640625, 1.7927703857421875, 2.0238513946533203, 47.20503234863281, 28.922195434570312, 17.551010131835938, 17.59375, 0.47196197509765625, 2.3196754455566406, 5.52808952331543, 12.497940063476562, -2.1458053588867188, 7.3430938720703125, 3.3998470306396484, 5.216102600097656, 10.890460968017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 12.812248229980469, "std": 13.627901077270508, "min": -10.041458129882812, "p10": -0.9386129379272456, "median": 8.938743591308594, "p90": 28.16742401123047, "max": 64.53085327148438, "pos_frac": 0.875, "sample": [5.2684478759765625, 8.791458129882812, 41.7718505859375, 13.918441772460938, 16.662750244140625, 22.542503356933594, -10.041458129882812, -4.061859130859375, -0.5657672882080078, 9.476509094238281, 6.6407928466796875, 11.269695281982422, 1.209075927734375, 13.040733337402344, 5.060707092285156, 20.510143280029297, 39.50335693359375, 8.525337219238281, 23.0560302734375, 21.332435607910156, -5.036201477050781, 36.46893310546875, 16.57807159423828, 9.794639587402344, 24.50897979736328, 18.82958984375, 4.7452850341796875, 20.757118225097656, 16.19412612915039, 8.59237289428711, 0.7035179138183594, -1.1323356628417969, 25.24396514892578, 64.53085327148438, 6.424201965332031, 29.503463745117188, 9.086029052734375, 11.815582275390625, 25.911102294921875, 1.5028076171875, 0.9846992492675781, 20.792800903320312, 12.317363739013672, 4.037017822265625, -1.0984039306640625, 19.331138610839844, 7.801067352294922, 45.21440124511719, 5.19952392578125, 11.208393096923828, 7.069904327392578, -7.201667785644531, 8.007820129394531, 4.458534240722656, 3.2278404235839844, 6.456787109375, 27.381622314453125, 27.71636962890625, 0.4111328125, 28.360733032226562, 2.6269149780273438, 1.8983268737792969, 7.315645217895508, -2.4674072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 13.855405807495117, "std": 16.084291458129883, "min": -20.32366943359375, "p10": -4.334882736206055, "median": 12.983701705932617, "p90": 32.082349395751955, "max": 60.741180419921875, "pos_frac": 0.84375, "sample": [7.66276741027832, 0.683380126953125, 15.211803436279297, 23.186927795410156, 27.065597534179688, 15.571863174438477, -13.748687744140625, -7.3321685791015625, 17.500083923339844, 4.551950454711914, -4.876922607421875, 21.227783203125, -4.524311065673828, 15.262680053710938, 54.032958984375, 1.2886314392089844, 10.39727783203125, 35.796661376953125, 14.845003128051758, 60.741180419921875, 17.233993530273438, 12.36306381225586, 3.344879150390625, 39.816192626953125, 0.8270988464355469, 22.744384765625, 28.420562744140625, 16.249258041381836, -4.691221237182617, 8.925352096557617, 8.421241760253906, 32.44043731689453, 3.4126052856445312, 0.23874282836914062, 1.9074554443359375, -2.6436538696289062, 20.232807159423828, 5.124122619628906, 18.039688110351562, -3.89288330078125, 5.040660858154297, 5.403350830078125, -4.7503204345703125, 1.384246826171875, 13.806083679199219, 4.179901123046875, 25.960800170898438, 30.6005859375, -20.32366943359375, 5.205715179443359, 26.303192138671875, 6.826343536376953, 1.8058109283447266, 47.755950927734375, 18.196754455566406, -2.022624969482422, 49.63946533203125, 13.604339599609375, 28.180908203125, 21.310182571411133, 4.968563079833984, 29.687347412109375, 19.676998138427734, 31.246810913085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 16.68012237548828, "std": 18.526941299438477, "min": -13.87310791015625, "p10": 0.12385292053222757, "median": 11.303287506103516, "p90": 32.044468688964855, "max": 78.6134033203125, "pos_frac": 0.890625, "sample": [-9.051185607910156, 64.28228759765625, 4.604808807373047, 4.321571350097656, 7.083683013916016, 16.730571746826172, 7.5076751708984375, 15.948661804199219, 21.50342559814453, 8.500133514404297, 29.737457275390625, 11.744749069213867, 28.375534057617188, 5.367835998535156, 5.1670074462890625, 6.5751190185546875, 12.792247772216797, 23.086227416992188, -1.2333450317382812, 2.791013717651367, 5.186065673828125, 66.26048278808594, 10.705711364746094, 10.912681579589844, 21.358543395996094, 21.505958557128906, 8.192913055419922, 17.425121307373047, 16.417726516723633, -13.87310791015625, 1.1310348510742188, 5.6776275634765625, 21.806838989257812, -0.3077964782714844, 6.812742233276367, 23.5673828125, 27.098480224609375, -1.51959228515625, 28.699356079101562, 28.263755798339844, 2.4153060913085938, 21.849021911621094, 5.400001525878906, 10.377227783203125, 25.43170166015625, 27.295639038085938, 2.84716796875, 54.40443420410156, 71.61279296875, 78.6134033203125, 3.4924774169921875, 33.03318786621094, -11.36569595336914, -1.8401908874511719, 23.526931762695312, 11.693893432617188, 10.208976745605469, 13.661346435546875, 23.889305114746094, 22.367416381835938, 45.85394287109375, 5.472621917724609, 7.301332473754883, 8.828201293945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 16.97983169555664, "std": 20.599525451660156, "min": -35.96484375, "p10": -3.5760978698730423, "median": 15.942448616027832, "p90": 44.61502532958986, "max": 85.755615234375, "pos_frac": 0.890625, "sample": [23.07074737548828, 20.470605850219727, 6.143257141113281, 17.850662231445312, 3.7226486206054688, 6.893665313720703, 13.447120666503906, 37.19000244140625, -26.268325805664062, 35.6822509765625, 22.204143524169922, 20.340242385864258, 8.588350296020508, 1.847412109375, 21.732406616210938, 11.955390930175781, 20.444664001464844, 37.767738342285156, 22.570480346679688, 46.324493408203125, 54.55226135253906, -9.144693374633789, 11.005287170410156, 12.674783706665039, 26.163963317871094, 0.993560791015625, 24.147628784179688, 15.915565490722656, 20.204795837402344, 24.659202575683594, 4.349723815917969, 5.84197998046875, 19.41619873046875, -27.215194702148438, 52.496490478515625, 40.62626647949219, 25.284156799316406, 9.36056137084961, 28.150924682617188, -7.365013122558594, 12.654163360595703, 61.541473388671875, 85.755615234375, -10.173042297363281, 12.757680892944336, 22.431076049804688, 6.09698486328125, 11.308765411376953, 1.8700733184814453, 54.9298095703125, -35.96484375, 4.750392913818359, 1.9567337036132812, 17.833328247070312, 19.081558227539062, 25.466224670410156, 12.37493896484375, 20.039810180664062, 52.330047607421875, 15.969331741333008, 7.422027587890625, 6.4570159912109375, -5.534523010253906, 1.2581825256347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 17.105716705322266, "std": 27.847681045532227, "min": -42.330169677734375, "p10": -16.921801757812496, "median": 13.39882755279541, "p90": 46.51243515014649, "max": 95.60816955566406, "pos_frac": 0.78125, "sample": [40.95159912109375, -25.67591094970703, 12.267593383789062, 21.63866424560547, 10.175872802734375, 51.516563415527344, -13.9454345703125, 28.783294677734375, 13.361358642578125, 0.34059906005859375, 45.55670166015625, 40.70327377319336, 34.61859130859375, -2.0925521850585938, -3.6517486572265625, -23.85357666015625, 80.30746459960938, -2.86712646484375, 13.436296463012695, 27.38873291015625, 22.2266845703125, 10.977157592773438, 16.107276916503906, 1.5729598999023438, 3.270030975341797, 14.59371566772461, -30.069141387939453, 12.781845092773438, 44.37348937988281, 19.174224853515625, 18.388633728027344, 5.432168960571289, 88.34310913085938, -20.544204711914062, 3.871135711669922, 42.65965270996094, 44.57832336425781, -13.130315780639648, 6.716464996337891, 0.6593894958496094, -7.1897125244140625, -9.159339904785156, -18.1973876953125, 7.121559143066406, 18.995529174804688, 20.460723876953125, 4.465763092041016, 6.919609069824219, 26.595481872558594, 61.687469482421875, 23.8424072265625, 8.0938720703125, 35.707672119140625, 5.411396026611328, 21.89196014404297, -20.621688842773438, 16.715545654296875, 82.21070861816406, 46.513458251953125, 95.60816955566406, 3.2910900115966797, 46.510047912597656, 19.278854370117188, -42.330169677734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 18.996326446533203, "std": 30.875200271606445, "min": -55.89971160888672, "p10": -4.7913419723510735, "median": 14.050405502319336, "p90": 43.833088684082036, "max": 188.3902587890625, "pos_frac": 0.765625, "sample": [30.663368225097656, 10.734962463378906, 19.82453155517578, -0.04700469970703125, 28.529396057128906, 11.497798919677734, 13.93075942993164, 75.57488250732422, 2.2240142822265625, 31.34355926513672, 24.555320739746094, 11.007942199707031, 26.23239517211914, -0.8455810546875, 18.947166442871094, 188.3902587890625, 10.802833557128906, 43.331939697265625, 20.475727081298828, 5.345359802246094, 2.350494384765625, 71.81898498535156, 6.851951599121094, 44.04786682128906, 31.58502960205078, 13.078811645507812, -4.919168472290039, -9.513320922851562, -1.4235515594482422, -28.48095703125, 20.220840454101562, -3.7717742919921875, 1.4199256896972656, 11.640731811523438, -55.89971160888672, 26.383934020996094, -5.490913391113281, 36.23537826538086, 5.479217529296875, -9.294414520263672, -14.698680877685547, 14.170051574707031, 28.134628295898438, 27.46136474609375, 1.5176315307617188, 71.83526611328125, 18.407379150390625, -2.7170562744140625, 15.33026123046875, 12.027233123779297, 13.899951934814453, 49.46885681152344, 27.367019653320312, 34.192169189453125, 53.057212829589844, 16.97606658935547, 6.051244735717773, 29.410324096679688, 41.539215087890625, 37.32567596435547, -3.0907745361328125, -4.493080139160156, -3.44232177734375, 21.196269989013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 26.377384185791016, "std": 28.478254318237305, "min": -20.946075439453125, "p10": -2.299550819396972, "median": 20.148950576782227, "p90": 65.66344146728517, "max": 99.53512573242188, "pos_frac": 0.859375, "sample": [-1.0995502471923828, 37.29789733886719, 21.915359497070312, 12.434455871582031, 24.815269470214844, 13.740161895751953, 12.428230285644531, 5.044811248779297, 63.39947509765625, 98.611572265625, 39.99474334716797, 18.77017593383789, -14.4918212890625, 35.65514373779297, 99.53512573242188, 20.721038818359375, 58.562255859375, 82.88999938964844, 12.034482955932617, -1.5625286102294922, 58.604209899902344, 34.61518859863281, 19.576862335205078, 10.609245300292969, 9.63092041015625, 62.56853485107422, 66.63371276855469, 3.6206092834472656, 11.55953598022461, 38.85660934448242, 24.372398376464844, 23.318927764892578, 0.3104705810546875, 88.43736267089844, -9.689437866210938, -2.61541748046875, 34.81524658203125, -20.946075439453125, 27.328399658203125, 25.555641174316406, 9.97970199584961, 15.028385162353516, -13.664459228515625, 12.88641357421875, -7.301910400390625, 38.80271911621094, 4.137979507446289, 17.426307678222656, 56.72406005859375, 37.707122802734375, 4.533771514892578, -11.461181640625, 10.035930633544922, 10.412254333496094, 20.739517211914062, 6.917362213134766, 7.0895233154296875, 34.19232177734375, 17.72966766357422, 22.903404235839844, 51.93132781982422, 88.51034545898438, 83.94931030273438, 21.0833740234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 16.212825775146484, "std": 21.6419677734375, "min": -63.79029846191406, "p10": -5.238659858703612, "median": 14.986291885375977, "p90": 39.54368476867676, "max": 85.38795471191406, "pos_frac": 0.84375, "sample": [16.249074935913086, 4.770870208740234, 3.4819488525390625, 33.74900817871094, 5.306068420410156, 15.379730224609375, 4.899040222167969, 33.69242858886719, 39.78679275512695, 21.136306762695312, 22.75505828857422, 28.548091888427734, 11.942588806152344, 38.97643280029297, 4.753692626953125, 6.461238861083984, 10.756906509399414, 85.38795471191406, 46.407203674316406, 8.994789123535156, -12.0689697265625, 14.872783660888672, -0.6769657135009766, 17.09246063232422, -21.464874267578125, 5.970170974731445, 7.079580307006836, 22.327613830566406, 7.363983154296875, 6.250572204589844, 38.11450958251953, 19.729705810546875, 27.305908203125, 11.281961441040039, 36.53364562988281, 5.086053848266602, -5.754150390625, 36.708282470703125, 2.5275707244873047, -16.769287109375, 7.967817306518555, 5.266609191894531, -10.070898056030273, -16.560348510742188, 22.320877075195312, 44.14933776855469, 64.78407287597656, 35.490074157714844, 9.79543685913086, 8.713264465332031, 14.6572265625, -4.035848617553711, 18.21654510498047, 25.314659118652344, 19.863380432128906, 15.099800109863281, -0.7324333190917969, 22.553955078125, 46.11643981933594, 31.97296905517578, 32.22026062011719, 47.860565185546875, 15.501510620117188, -63.79029846191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 26.634931564331055, "std": 29.98417091369629, "min": -64.20672607421875, "p10": -1.928185272216794, "median": 26.747007369995117, "p90": 72.99423370361328, "max": 104.33795166015625, "pos_frac": 0.890625, "sample": [22.209144592285156, 53.932777404785156, 38.96078872680664, 52.062557220458984, 55.36466979980469, -3.1373062133789062, 75.94390869140625, 26.342830657958984, 10.532630920410156, 10.657115936279297, 9.241216659545898, 43.69842529296875, 48.789955139160156, 5.826061248779297, 0.893096923828125, 36.48632049560547, -64.20672607421875, 31.767807006835938, 48.991294860839844, 40.212196350097656, 16.14837074279785, -3.4601783752441406, 12.545063018798828, 2.141387939453125, 7.7298736572265625, 73.11752319335938, 5.1428985595703125, 35.94702911376953, 15.842296600341797, -6.679250717163086, 41.30515670776367, 34.82953643798828, 15.345710754394531, 83.29328918457031, 43.7515869140625, 12.095478057861328, 81.66543579101562, 104.33795166015625, 26.201080322265625, 19.675125122070312, -42.65672302246094, 10.737979888916016, -30.54058837890625, 3.4406471252441406, 15.650899887084961, 27.15118408203125, 38.80079650878906, 29.41023063659668, 33.73817443847656, 31.44306182861328, 10.818988800048828, -15.237823486328125, 5.415355682373047, 34.98341369628906, 34.21903991699219, 28.351730346679688, 6.16729736328125, 72.70655822753906, 86.76087951660156, 16.191570281982422, 31.769290924072266, 74.45249938964844, 27.585407257080078, 7.731658935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 19.68130111694336, "std": 25.280540466308594, "min": -31.548439025878906, "p10": -5.78747673034668, "median": 18.840758323669434, "p90": 53.263667297363284, "max": 116.18325805664062, "pos_frac": 0.78125, "sample": [-5.467842102050781, 1.9337348937988281, 53.077911376953125, 21.210281372070312, -5.19281005859375, 56.37931823730469, 18.633609771728516, 116.18325805664062, 32.43110656738281, 25.86178970336914, 25.106456756591797, 11.389713287353516, -5.776905059814453, 15.227741241455078, 25.34354019165039, 24.433422088623047, -0.809326171875, 11.83050537109375, -5.7920074462890625, 6.372398376464844, 6.347524642944336, 20.342988967895508, 33.4871711730957, 31.92871856689453, 23.909557342529297, 14.991291046142578, 21.479080200195312, 24.052276611328125, 32.44789123535156, 6.481159210205078, 53.34327697753906, 19.04790687561035, 6.945045471191406, -17.047863006591797, 32.0637321472168, -19.5106201171875, 73.69491577148438, 27.39508819580078, 54.330177307128906, -8.242141723632812, 43.950199127197266, 3.0196914672851562, 63.02545166015625, 36.825504302978516, 39.033058166503906, -3.1546974182128906, 13.423141479492188, 4.205776214599609, 14.55080795288086, 7.659576416015625, 7.876577377319336, -1.9719676971435547, 19.253042221069336, -5.0571136474609375, 42.23353576660156, -31.548439025878906, 57.54341125488281, 6.465106964111328, -27.788070678710938, 32.024986267089844, -11.33447265625, 48.712249755859375, 5.199577331542969, 35.59321594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 21.310894012451172, "std": 29.387636184692383, "min": -35.37001037597656, "p10": -12.434281539916986, "median": 16.17245101928711, "p90": 60.22700500488282, "max": 97.72531127929688, "pos_frac": 0.765625, "sample": [-15.006599426269531, 63.015586853027344, 6.638341903686523, 87.50570678710938, 2.7240257263183594, 0.8811798095703125, -3.8518009185791016, -15.150657653808594, 51.44403076171875, 40.780609130859375, 21.362403869628906, -35.37001037597656, 29.540756225585938, 88.41194152832031, 9.273124694824219, 33.56902313232422, 88.83833312988281, 59.212615966796875, 20.748863220214844, -17.83416748046875, 97.72531127929688, 21.914566040039062, -5.136383056640625, -16.314697265625, 35.78416442871094, 0.44612884521484375, -6.364227294921875, 20.549556732177734, 16.415786743164062, 77.8674087524414, 2.6587295532226562, 9.443231582641602, 15.929115295410156, 32.10761642456055, -17.656906127929688, 12.71624755859375, 1.2306747436523438, 8.630332946777344, 53.559356689453125, 54.98057556152344, 27.255725860595703, 40.4490966796875, 43.88963317871094, 22.83059310913086, 32.864501953125, -6.605686187744141, 2.0908203125, 39.128868103027344, 46.869422912597656, 12.617364883422852, 19.429683685302734, -6.432641983032227, -2.0647201538085938, 48.010650634765625, -1.2306785583496094, 27.902297973632812, 5.950603485107422, 8.446479797363281, 0.9564361572265625, -14.9322509765625, 6.100164413452148, 60.6617431640625, -5.312404632568359, 21.801544189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 20.36130142211914, "std": 31.15595245361328, "min": -37.04254913330078, "p10": -10.91354751586914, "median": 12.289521217346191, "p90": 53.47596511840821, "max": 131.66787719726562, "pos_frac": 0.765625, "sample": [25.038116455078125, 10.4375, -11.1707763671875, 28.895530700683594, 6.756855010986328, 4.29951286315918, -5.091960906982422, -12.899803161621094, 44.872406005859375, -4.800605773925781, 4.767009735107422, 18.62139129638672, -19.012855529785156, -27.608863830566406, -13.264678955078125, 29.29791259765625, 131.66787719726562, 49.19916534423828, 103.30789184570312, 54.33232879638672, 35.26129150390625, 7.732048034667969, 51.05131530761719, 60.571739196777344, 40.28535461425781, 18.926593780517578, -1.3543338775634766, 44.42546081542969, 17.41266632080078, 9.08441162109375, 6.303958892822266, -21.915786743164062, 5.212337493896484, -8.809967041015625, 29.393081665039062, -37.04254913330078, 71.202392578125, 39.28075408935547, -10.313346862792969, 112.95114135742188, 11.640359878540039, 6.55517578125, 2.4231529235839844, 4.2872161865234375, 59.30829620361328, 16.632225036621094, 29.303627014160156, 42.86259460449219, 17.28958511352539, 10.810615539550781, -5.1868743896484375, -4.584402084350586, 1.2152481079101562, 10.837478637695312, 26.58836555480957, 28.095245361328125, 9.516399383544922, 3.355754852294922, 39.16375732421875, 31.924819946289062, 12.938682556152344, 51.477783203125, -5.652379989624023, 15.018074035644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 27.691865921020508, "std": 35.83575439453125, "min": -43.092071533203125, "p10": -12.67999496459961, "median": 23.462547302246094, "p90": 72.47195129394532, "max": 152.05120849609375, "pos_frac": 0.84375, "sample": [54.55677795410156, 41.05059814453125, -43.092071533203125, 46.29241180419922, 31.125995635986328, 80.53822326660156, 24.012557983398438, -14.975223541259766, 9.252662658691406, 38.74842834472656, 38.63414001464844, -35.04046630859375, 3.2657699584960938, 33.547203063964844, 113.29440307617188, 15.335681915283203, 30.248672485351562, 46.521728515625, 73.09402465820312, -20.669448852539062, 1.7016639709472656, 8.789337158203125, 24.10124969482422, 85.04556274414062, 28.838890075683594, 13.372993469238281, 22.91253662109375, 11.389774322509766, 9.013444900512695, -12.798072814941406, 30.845901489257812, 5.088287353515625, 6.787347793579102, 14.079788208007812, 30.12596893310547, 152.05120849609375, 53.36586380004883, 22.032455444335938, 2.1109466552734375, 71.02044677734375, 51.224456787109375, 63.449188232421875, 20.363723754882812, -2.7108154296875, 4.39569091796875, -4.323921203613281, -25.57156753540039, 17.557708740234375, 6.759559631347656, 38.9254150390625, 18.611358642578125, 13.22065544128418, 26.20067596435547, -21.681549072265625, 43.74567413330078, 52.87371063232422, 131.27984619140625, 73.22836303710938, 11.65020751953125, 40.98320770263672, 2.2241592407226562, 35.04277801513672, -12.40447998046875, 41.61778259277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 21.982954025268555, "std": 38.47688674926758, "min": -77.08424377441406, "p10": -22.07347717285156, "median": 18.885605812072754, "p90": 65.24722518920899, "max": 155.16336059570312, "pos_frac": 0.78125, "sample": [-15.953006744384766, 42.89076232910156, -29.562850952148438, 21.36739730834961, 24.846416473388672, 21.467363357543945, -34.515350341796875, 17.464031219482422, 30.44427490234375, 70.2555923461914, 85.86685180664062, 14.092609405517578, -21.59809112548828, 12.599945068359375, 18.002058029174805, 131.79649353027344, 16.2244873046875, -64.32594299316406, -14.384651184082031, 41.081329345703125, 2.8228683471679688, 37.89894104003906, 65.88806915283203, 5.049406051635742, -22.27721405029297, -10.732070922851562, 4.386283874511719, 32.63877868652344, 25.905914306640625, 14.615768432617188, 26.253463745117188, 43.17375183105469, 47.210174560546875, -0.9641227722167969, 92.48837280273438, 22.39996337890625, 63.751922607421875, 19.082950592041016, 10.75833511352539, 10.691513061523438, -3.8068771362304688, 37.14079284667969, 77.40083312988281, -25.583969116210938, 35.455474853515625, 28.904022216796875, 7.894502639770508, 55.67344284057617, 47.199180603027344, -36.845916748046875, 12.693161010742188, 8.046951293945312, -77.08424377441406, 11.412506103515625, 41.284278869628906, 25.458511352539062, 5.193611145019531, 29.433555603027344, 18.207130432128906, 155.16336059570312, 19.749900817871094, 18.688261032104492, -0.18084716796875, 56.30873107910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 26.131643295288086, "std": 33.53199768066406, "min": -34.02738952636719, "p10": -13.881576538085938, "median": 20.169292449951172, "p90": 79.48622283935549, "max": 106.36996459960938, "pos_frac": 0.796875, "sample": [25.485191345214844, 31.329822540283203, 19.195423126220703, 84.033447265625, 56.20350646972656, 14.160575866699219, -10.235092163085938, 92.28204345703125, 75.01730346679688, 3.0751800537109375, 15.409027099609375, 19.513771057128906, -2.35589599609375, -7.77880859375, 21.795333862304688, 82.73448181152344, -17.8167724609375, 106.36996459960938, 11.76080322265625, 94.0175552368164, 52.27174377441406, 16.054031372070312, 57.68206787109375, -25.93744659423828, -32.316436767578125, -13.955917358398438, 68.6278076171875, -27.271751403808594, 19.175878524780273, 81.40147399902344, 7.307853698730469, 18.67474365234375, 5.53770637512207, -3.03851318359375, 33.25872039794922, 13.821922302246094, 31.229236602783203, 0.17009735107421875, 48.23224639892578, 18.797222137451172, 27.219635009765625, 26.280582427978516, 54.90937805175781, 89.14944458007812, 20.913002014160156, 59.967681884765625, 5.597755432128906, -23.055694580078125, -3.2757701873779297, 3.7581253051757812, -34.02738952636719, 69.55897521972656, 10.480148315429688, 49.73124694824219, 30.53960418701172, 31.933753967285156, -13.708114624023438, 9.237903594970703, 20.824813842773438, 42.75489044189453, 50.275657653808594, 21.30413055419922, 35.046409606933594, 3.089427947998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 21.604463577270508, "std": 25.076475143432617, "min": -57.197784423828125, "p10": -2.4068988800048823, "median": 19.97245693206787, "p90": 50.47737197875976, "max": 103.350341796875, "pos_frac": 0.84375, "sample": [6.551826477050781, 20.896255493164062, 3.3240280151367188, 6.738958358764648, 56.077972412109375, 8.57171630859375, 6.7498016357421875, 24.656982421875, 19.63606071472168, 7.561004638671875, 34.45412063598633, 44.934669494628906, -25.515106201171875, 14.950881958007812, 23.527725219726562, 33.676544189453125, 12.909599304199219, 103.350341796875, 35.6243782043457, 1.746856689453125, 6.558994293212891, 32.27996826171875, 4.538291931152344, 7.490087509155273, -1.0818138122558594, 50.540367126464844, 31.503631591796875, 32.06683349609375, -57.197784423828125, -2.533559799194336, 60.2886962890625, 1.9804611206054688, 43.361724853515625, 16.733367919921875, 25.416427612304688, 14.92071533203125, 59.008811950683594, 50.33038330078125, 40.08868408203125, 20.308853149414062, 2.2348709106445312, 35.679710388183594, -6.076929092407227, 18.202219009399414, 32.89874267578125, 33.477394104003906, -2.111356735229492, -5.5517578125, 39.86515808105469, 37.49785614013672, -7.3883056640625, 48.62507629394531, 68.31959533691406, 21.74909210205078, 69.29110717773438, 18.537918090820312, 36.68080139160156, 10.6356201171875, -25.126060485839844, 35.06848907470703, 13.437507629394531, 23.008712768554688, 6.7725372314453125, -0.07015419006347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 22.484390258789062, "std": 29.94058609008789, "min": -38.98689270019531, "p10": -8.953787231445311, "median": 19.825130462646484, "p90": 70.2500328063965, "max": 85.52902221679688, "pos_frac": 0.703125, "sample": [51.765625, 18.351760864257812, 32.58024597167969, 4.5932769775390625, -7.256317138671875, 24.57469940185547, 36.443145751953125, -0.031604766845703125, 73.21141815185547, 11.776065826416016, -2.772401809692383, 6.574531555175781, 81.17762756347656, 32.97198486328125, 28.74212074279785, 23.86243438720703, -6.795166015625, -25.855751037597656, 21.621192932128906, -0.25286102294921875, -38.98689270019531, 85.52902221679688, 23.83393096923828, -2.721078872680664, -9.6812744140625, 49.49163055419922, 6.84930419921875, -14.773635864257812, 20.534324645996094, 11.943374633789062, 57.31390380859375, -5.7531585693359375, -23.60449981689453, -1.8159980773925781, 14.904052734375, 5.288280487060547, 72.77816772460938, 19.78659439086914, 46.263916015625, -0.7870655059814453, -5.078523635864258, 78.51960754394531, -2.5060672760009766, 54.27931213378906, 73.23683166503906, 17.712993621826172, 64.3510513305664, 83.60348510742188, 13.224456787109375, 7.001371383666992, 40.403587341308594, 59.78339385986328, 0.1027374267578125, -10.152389526367188, 22.022781372070312, 21.45774269104004, 58.218048095703125, 25.061256408691406, 34.655982971191406, 43.99180221557617, -28.874786376953125, 19.863666534423828, -1.341806411743164, 47.78947448730469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 23.718734741210938, "std": 46.36717224121094, "min": -72.22360229492188, "p10": -18.273620986938475, "median": 16.88607406616211, "p90": 79.25022583007814, "max": 180.38143920898438, "pos_frac": 0.71875, "sample": [-16.180641174316406, 22.386337280273438, 81.008544921875, -26.826614379882812, 40.15000915527344, 36.276641845703125, 31.053834915161133, 0.3098602294921875, 126.6788330078125, 12.701362609863281, 7.74470329284668, -10.288986206054688, -0.5933609008789062, 26.287490844726562, -12.45135498046875, 23.26849365234375, 56.805084228515625, 20.23367691040039, -3.577594757080078, -70.80339050292969, 77.16116333007812, 26.451824188232422, 36.05656814575195, 12.559206008911133, 10.940498352050781, -19.170612335205078, -7.973121643066406, 80.14553833007812, -4.548892974853516, 9.91590690612793, 43.723575592041016, 19.954559326171875, 9.173545837402344, -72.22360229492188, 58.21636962890625, 62.618743896484375, 31.862442016601562, -53.93799591064453, 92.62675476074219, 2.6169261932373047, 180.38143920898438, 19.706119537353516, 24.1572265625, 14.066028594970703, -6.1184234619140625, 151.74676513671875, 7.567850112915039, 147.9290771484375, 56.8212890625, -12.409210205078125, -37.76129913330078, 44.447792053222656, 10.453601837158203, 38.12806701660156, 34.48316192626953, 37.66389083862305, 40.65949249267578, 7.1401214599609375, -0.8971786499023438, 3.4028377532958984, -4.638605117797852, -25.10157012939453, 2.117034912109375, 23.70108413696289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 28.232730865478516, "std": 40.88151168823242, "min": -100.774658203125, "p10": -12.64660949707031, "median": 23.05611801147461, "p90": 81.76258087158206, "max": 151.75340270996094, "pos_frac": 0.8125, "sample": [25.3569278717041, 42.272377014160156, 0.33016014099121094, 10.992790222167969, 41.003204345703125, -13.681594848632812, 39.86674499511719, -0.6620082855224609, 0.8436355590820312, -8.703964233398438, -31.278884887695312, 59.525238037109375, -7.680084228515625, 86.92083740234375, 40.271636962890625, 72.53364562988281, 61.28313446044922, 13.661787033081055, 106.96502685546875, 76.42056274414062, 22.64942169189453, 24.5108642578125, 8.154195785522461, 48.11000061035156, 47.10954284667969, 2.377431869506836, 45.880043029785156, 151.75340270996094, 13.986026763916016, 4.7352447509765625, 13.366952896118164, 88.88836669921875, 67.83920288085938, 12.125415802001953, 67.80474853515625, 31.274490356445312, 16.275297164916992, 45.889461517333984, 23.462814331054688, 95.66819763183594, 0.7195930480957031, 96.73246765136719, 9.768455505371094, 20.887943267822266, -100.774658203125, 14.093526840209961, 50.26702117919922, -38.76367950439453, 44.953582763671875, 84.05201721191406, -22.453964233398438, -22.702552795410156, 10.038932800292969, 22.442150115966797, -10.231643676757812, 69.01708984375, 31.171890258789062, 41.31163024902344, -46.634033203125, 31.884918212890625, 15.386466979980469, 0.4354438781738281, 63.99040222167969, -6.80047607421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 21.33620262145996, "std": 39.74388885498047, "min": -76.8094482421875, "p10": -16.79471435546875, "median": 14.08046817779541, "p90": 69.5438659667969, "max": 163.4503173828125, "pos_frac": 0.75, "sample": [10.836860656738281, 8.927383422851562, 74.50480651855469, 80.69085693359375, 39.415374755859375, 60.00276184082031, 4.864526748657227, 72.18829345703125, -17.960189819335938, 15.414159774780273, 44.022369384765625, 8.275367736816406, 1.23095703125, 56.931495666503906, -26.719009399414062, 39.31391906738281, -23.08313751220703, 15.925880432128906, -59.576171875, -76.8094482421875, -7.331718444824219, 92.4429931640625, 48.636322021484375, -4.8368072509765625, 47.97307586669922, 5.430999755859375, 39.044342041015625, -14.075271606445312, 26.865785598754883, 22.89354705810547, 46.432151794433594, 8.321250915527344, -2.7115955352783203, 31.385276794433594, 0.9193038940429688, 116.88800048828125, 4.309516906738281, 63.37353515625, 5.702110290527344, 163.4503173828125, 8.87448501586914, 54.72968292236328, 2.5046253204345703, 14.830644607543945, 13.9156494140625, 29.129119873046875, -1.076416015625, -4.377750396728516, 41.07923889160156, 94.9090576171875, 37.59712219238281, 18.44603729248047, -2.735065460205078, 11.862678527832031, -65.01255798339844, -6.172332763671875, 12.04163932800293, -1.4341754913330078, 14.24528694152832, 15.693466186523438, 20.788599014282227, -24.231353759765625, 7.0724639892578125, 49.32661437988281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 23.626157760620117, "std": 33.15934753417969, "min": -86.187744140625, "p10": -3.326249313354492, "median": 18.639235496520996, "p90": 58.628614807128926, "max": 144.7644500732422, "pos_frac": 0.84375, "sample": [10.633529663085938, 1.5667572021484375, 6.793567657470703, 23.09031867980957, -12.421092987060547, 60.370574951171875, 50.681396484375, 18.2030029296875, -13.064407348632812, 64.15760040283203, 67.11638641357422, 26.550960540771484, 24.815353393554688, 54.56404113769531, 44.480079650878906, 33.65053939819336, 12.128799438476562, 11.088798522949219, -3.0647106170654297, 22.63608169555664, 18.129627227783203, 4.33433723449707, 17.873170852661133, 13.647579193115234, -13.224319458007812, 0.4524097442626953, 2.465576171875, 11.351381301879883, 92.42561340332031, 21.94544219970703, 34.08384704589844, 100.60870361328125, 18.327228546142578, 50.93958282470703, 24.795230865478516, -1.4028778076171875, 12.468700408935547, 144.7644500732422, 6.782596588134766, -3.4383373260498047, 32.46953201293945, -3.6608505249023438, 27.096933364868164, 34.77696228027344, 17.143701553344727, 5.6800994873046875, 11.050559997558594, 4.780574798583984, 3.3602294921875, 43.04560089111328, 24.96692657470703, 49.86732482910156, -2.6479263305664062, 20.630558013916016, 53.49170684814453, -86.187744140625, -32.022735595703125, 31.673988342285156, 18.951242446899414, 28.466331481933594, 24.49755859375, 25.660606384277344, 108.64259338378906, 9.032821655273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 32.863868713378906, "std": 50.2245979309082, "min": -65.70082092285156, "p10": -16.687039184570306, "median": 21.897754669189453, "p90": 110.96323852539065, "max": 174.877197265625, "pos_frac": 0.796875, "sample": [84.03158569335938, -52.09703826904297, 31.13800048828125, 114.30307006835938, 23.015304565429688, 41.19220733642578, 0.6179618835449219, -6.308990478515625, 63.6414794921875, -2.3328685760498047, 51.107810974121094, 20.345779418945312, -2.819896697998047, 13.740341186523438, 46.62200927734375, 98.34619140625, -6.737331390380859, 174.877197265625, 65.24172973632812, 1.0122146606445312, 2.504484176635742, 12.761184692382812, 26.287925720214844, -19.945098876953125, 143.24517822265625, 17.43743133544922, 28.416786193847656, 145.38665771484375, 5.259183883666992, 25.591190338134766, 162.74981689453125, 26.992347717285156, -37.82386779785156, 13.993309020996094, -9.08489990234375, -65.70082092285156, 30.896656036376953, 9.442359924316406, 72.11172485351562, 6.706199645996094, 19.973297119140625, 14.508758544921875, -23.759483337402344, -6.335317611694336, 25.898574829101562, 105.00439453125, 118.71826171875, -42.913963317871094, -42.0589599609375, 12.680171966552734, 43.353271484375, 1.3761653900146484, 21.304187774658203, 46.97876739501953, 78.74725341796875, 39.55461120605469, 11.561487197875977, 12.748796463012695, 77.16592407226562, 22.491321563720703, 27.24700927734375, 21.2048282623291, 113.51702880859375, 48.156639099121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 26.079620361328125, "std": 41.07676315307617, "min": -42.13372802734375, "p10": -26.751273345947265, "median": 22.775060653686523, "p90": 82.39293670654303, "max": 150.8463134765625, "pos_frac": 0.734375, "sample": [20.556968688964844, 58.14714050292969, -42.13372802734375, 9.650577545166016, 87.71116638183594, 0.4379081726074219, -27.38469696044922, 48.899715423583984, 3.4820213317871094, 25.542892456054688, 49.885711669921875, 46.34662628173828, 50.48578643798828, 15.678733825683594, -39.20459747314453, 128.8889923095703, -12.376968383789062, 2.01763916015625, 9.0572509765625, 98.00125122070312, 9.1087646484375, 37.724727630615234, 150.8463134765625, -21.736526489257812, 29.004623413085938, 63.520103454589844, -25.77887725830078, 4.9347381591796875, 25.930648803710938, -7.69927978515625, -27.168014526367188, 24.129295349121094, 69.98373413085938, -27.659072875976562, 19.29821014404297, 24.630523681640625, 65.5171890258789, -16.404579162597656, 22.51632308959961, -5.874197006225586, 13.032791137695312, 16.9884033203125, 66.69448852539062, 3.28411865234375, -24.75078582763672, 43.8944091796875, 25.202789306640625, 89.29696655273438, 23.033798217773438, 51.01496887207031, 8.317756652832031, -30.312179565429688, -0.07192802429199219, 35.91868591308594, -2.5749778747558594, 43.622825622558594, -0.6966915130615234, 97.98469543457031, 51.20655822753906, 29.82274627685547, 64.37712097167969, -38.31928253173828, 55.41520690917969, 98.19820404052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 26.81001091003418, "std": 39.241493225097656, "min": -54.578086853027344, "p10": -13.458887481689452, "median": 19.667003631591797, "p90": 69.27715454101563, "max": 150.1461181640625, "pos_frac": 0.71875, "sample": [-5.88006591796875, 6.4637451171875, 6.71685791015625, 58.079856872558594, -9.034027099609375, 6.811553955078125, -54.578086853027344, -18.624183654785156, 57.052452087402344, 25.942829132080078, 76.91366577148438, -0.06152534484863281, 51.48371887207031, 2.915384292602539, -10.355659484863281, 129.7789306640625, 60.55796813964844, 150.1461181640625, -50.463348388671875, -5.1152496337890625, 28.06447982788086, -12.384902954101562, 30.488235473632812, -10.620040893554688, -23.17707061767578, 67.6097412109375, -13.919166564941406, 78.0164794921875, 35.34803009033203, 4.197242736816406, 81.4659652709961, -3.0852584838867188, 38.66473388671875, 0.6964550018310547, 56.595489501953125, 10.420036315917969, 38.799713134765625, 127.78108215332031, 10.6414794921875, 65.87786865234375, 50.547569274902344, 69.99176025390625, -2.6136093139648438, -8.866256713867188, 10.179267883300781, 29.935871124267578, 12.835189819335938, 44.852447509765625, 39.441162109375, 12.62735366821289, -0.010662078857421875, -15.501848220825195, 55.949153900146484, 18.271862030029297, 21.062145233154297, 10.383064270019531, 39.45618438720703, 60.616241455078125, 49.19178771972656, 28.609107971191406, 13.621458053588867, -15.9954833984375, 62.571807861328125, 38.453590393066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 26.72509002685547, "std": 34.87247085571289, "min": -43.46434783935547, "p10": -8.615437126159664, "median": 18.748634338378906, "p90": 64.44110107421875, "max": 172.14144897460938, "pos_frac": 0.796875, "sample": [21.988502502441406, 9.54621696472168, 30.857986450195312, 70.93888092041016, 65.14358520507812, 62.801971435546875, 3.8614501953125, 25.009498596191406, 53.46778106689453, -19.91254425048828, 89.605224609375, 26.91053009033203, 60.080474853515625, 57.029701232910156, 172.14144897460938, 16.520404815673828, -11.025917053222656, 2.9899139404296875, -1.0452651977539062, 101.91869354248047, -10.080223083496094, 14.737096786499023, 25.308948516845703, 0.85205078125, 39.009246826171875, 9.932037353515625, 43.366371154785156, 5.9723663330078125, -0.9269886016845703, 11.424232482910156, -0.5890712738037109, 13.07232666015625, 10.417789459228516, 20.298675537109375, 36.646270751953125, 62.70289611816406, 76.1234359741211, -28.679489135742188, -5.197603225708008, 60.80180358886719, 54.94390106201172, 36.556251525878906, 47.46813201904297, 7.672210693359375, 10.461494445800781, 10.994491577148438, 34.04743194580078, 3.0554542541503906, 62.68921661376953, -0.982086181640625, -11.541709899902344, 66.23138427734375, 1.4419937133789062, 20.79534149169922, 6.731998443603516, 5.69854736328125, -20.63909912109375, 31.564002990722656, 52.42695617675781, 37.10826110839844, 56.21495819091797, 17.198593139648438, -43.46434783935547, -0.2882041931152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 31.172901153564453, "std": 36.741024017333984, "min": -28.2353515625, "p10": -9.629676628112794, "median": 25.09614849090576, "p90": 78.29222793579103, "max": 134.626708984375, "pos_frac": 0.78125, "sample": [29.111682891845703, -21.713695526123047, 50.48509216308594, 22.337326049804688, 5.3143768310546875, -17.72014617919922, 11.983345031738281, 67.40437316894531, 5.682518005371094, -28.2353515625, 24.50492286682129, 12.1226806640625, 28.21495819091797, 8.5057373046875, 12.931713104248047, 0.17765045166015625, 68.41885375976562, 12.913265228271484, -7.869472503662109, 134.626708984375, 5.95482063293457, 36.87918472290039, 36.328216552734375, 60.54119873046875, 91.4234619140625, 37.09770202636719, 13.429489135742188, 33.420799255371094, 24.812366485595703, -3.5762863159179688, -5.674726486206055, 18.26804542541504, 42.88629913330078, 75.09136962890625, 48.11183547973633, 21.948009490966797, 40.09211730957031, 79.66402435302734, 36.07586669921875, 25.37993049621582, 5.834495544433594, 48.98846435546875, -6.851509094238281, -22.413414001464844, 75.09114074707031, 85.04000854492188, -9.673151016235352, 65.91702270507812, 133.80075073242188, -9.528236389160156, -25.793411254882812, 96.03578186035156, 38.98088073730469, 67.71458435058594, -3.5849075317382812, 22.344390869140625, -16.713912963867188, -6.105266571044922, 41.81883239746094, 18.622772216796875, 40.19585037231445, 65.72798156738281, 86.88446044921875, 65.38174438476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 24.850439071655273, "std": 35.511837005615234, "min": -91.53901672363281, "p10": -10.34252243041992, "median": 17.96693229675293, "p90": 75.67559051513676, "max": 108.69161987304688, "pos_frac": 0.8125, "sample": [20.916061401367188, 51.155609130859375, 1.8759078979492188, 26.060302734375, 25.01823616027832, 80.953369140625, 9.168079376220703, 7.211565017700195, -9.694267272949219, 14.108230590820312, 19.3732967376709, 4.4944305419921875, 40.85764694213867, 96.93638610839844, 56.024627685546875, -10.620346069335938, 42.51641082763672, 3.5811214447021484, 18.295225143432617, 53.726470947265625, 29.472869873046875, 46.22661590576172, 20.90720558166504, 16.935728073120117, 10.680410385131836, -15.903213500976562, 11.731094360351562, 65.78963470458984, 22.465953826904297, 33.411651611328125, -13.911079406738281, -11.582374572753906, 60.51914978027344, 108.69161987304688, 85.220458984375, 66.53182983398438, 46.612762451171875, 1.1960906982421875, -1.2033538818359375, 82.4700927734375, 52.344932556152344, 96.53106689453125, 79.59434509277344, 3.475799560546875, 15.88192367553711, 9.682613372802734, -1.9159564971923828, 35.75981903076172, 39.63592529296875, -91.53901672363281, 2.921724319458008, 1.6713752746582031, -40.6605224609375, 56.98558044433594, 48.146568298339844, 12.396160125732422, 44.61474609375, 12.489400863647461, 17.638639450073242, 17.123153686523438, -6.428220748901367, -3.6177196502685547, 15.065666198730469, -45.591468811035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 33.9572639465332, "std": 42.06351852416992, "min": -63.741455078125, "p10": -2.0989416122436513, "median": 19.098044395446777, "p90": 84.03550033569336, "max": 181.6194610595703, "pos_frac": 0.875, "sample": [74.71925354003906, 16.389724731445312, 43.4010009765625, 34.88957595825195, 18.446456909179688, 77.07781982421875, 9.185256958007812, 3.2206954956054688, 0.8697071075439453, 24.38055419921875, 129.1494903564453, 19.032691955566406, 6.794761657714844, -8.46493148803711, 3.0585670471191406, 23.646244049072266, 32.73845672607422, 49.537384033203125, 95.02440643310547, 104.77845764160156, 11.233802795410156, 50.23484802246094, 44.34406280517578, 181.6194610595703, 84.12781524658203, 13.867725372314453, 19.09698486328125, 7.148521423339844, 18.1114501953125, 15.970535278320312, 9.998260498046875, 160.53506469726562, 36.10143280029297, 30.986083984375, 7.131507873535156, 98.37396240234375, 69.16787719726562, 6.268047332763672, 41.42137908935547, 1.6588706970214844, 56.15303039550781, 27.3293514251709, 26.88958740234375, -13.726554870605469, 16.718618392944336, 0.7880516052246094, -4.637931823730469, 46.747047424316406, -2.453603744506836, 28.819480895996094, 13.150634765625, 70.18473815917969, -16.33980941772461, 12.125885009765625, 81.4921875, 13.968841552734375, 19.099103927612305, 78.96269226074219, 83.82009887695312, -3.7296676635742188, -1.2713966369628906, 37.230255126953125, -63.741455078125, 0.41234588623046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 25.998584747314453, "std": 32.727516174316406, "min": -70.4325942993164, "p10": -9.764253807067869, "median": 25.07027530670166, "p90": 68.28608398437501, "max": 123.23907470703125, "pos_frac": 0.75, "sample": [19.97246551513672, 59.52001953125, -6.1494293212890625, -0.8318557739257812, -5.671314239501953, -8.063343048095703, 17.792621612548828, 31.709829330444336, 2.9151268005371094, 52.39397430419922, 27.676799774169922, 47.257965087890625, -0.9081745147705078, 20.51289176940918, 87.01919555664062, 72.95559692382812, -27.911788940429688, 46.14903259277344, 0.9348068237304688, 15.328544616699219, 29.00188446044922, 5.469409942626953, 17.170988082885742, -25.916213989257812, 86.12615966796875, 20.67650032043457, 29.01983642578125, 34.868812561035156, 45.875732421875, 16.389118194580078, 31.064666748046875, -7.832727432250977, 19.090774536132812, -15.212305068969727, 57.19902801513672, 52.08154296875, 54.96440124511719, 33.62240219116211, -70.4325942993164, -11.428878784179688, 55.062599182128906, 33.301204681396484, -12.145233154296875, 8.974414825439453, 64.24076843261719, 39.043155670166016, 0.41347694396972656, 23.617603302001953, 26.522947311401367, 19.85657501220703, -5.314535140991211, 70.01979064941406, -10.493215560913086, 55.62527084350586, -5.515369415283203, 75.56553649902344, -3.2190399169921875, 43.45198059082031, 53.406951904296875, 71.61300659179688, 35.9700813293457, 11.97043228149414, 123.23907470703125, 34.30033874511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 29.64310073852539, "std": 43.803680419921875, "min": -126.57986450195312, "p10": -17.94754524230957, "median": 32.39496612548828, "p90": 75.63382186889649, "max": 151.06771850585938, "pos_frac": 0.78125, "sample": [48.04505157470703, 5.35443115234375, 68.99435424804688, 52.037384033203125, 7.523200988769531, 23.374711990356445, 28.561874389648438, 29.611412048339844, 35.103668212890625, 8.230781555175781, 33.24999237060547, -17.532745361328125, 22.55074691772461, 31.34455108642578, 75.72537231445312, 57.010162353515625, -18.125316619873047, -1.4992523193359375, -2.2319793701171875, -27.26734161376953, 44.065059661865234, 38.832969665527344, -37.659523010253906, -52.37994384765625, 62.4898681640625, 59.53017807006836, 33.01239013671875, 75.42020416259766, 78.74015808105469, 27.68899154663086, 45.95625305175781, -4.8781585693359375, 71.97978973388672, 151.06771850585938, -126.57986450195312, 96.99867248535156, 67.17108154296875, 31.777542114257812, 55.4010009765625, 22.95246124267578, -11.663215637207031, 44.46221160888672, -69.87277221679688, 31.42986297607422, 41.733306884765625, 4.317283630371094, 43.39562225341797, 90.91798400878906, 68.55870056152344, 40.7768440246582, 18.49799346923828, 14.711997985839844, 91.24928283691406, 122.6829833984375, 38.92078399658203, 38.4468879699707, 34.19078063964844, 17.279977798461914, 8.03519058227539, -10.253631591796875, 61.80882263183594, -5.076353073120117, 3.2872257232666016, -22.327293395996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 36.968345642089844, "std": 48.10136032104492, "min": -40.229042053222656, "p10": -10.387909698486327, "median": 25.715450286865234, "p90": 99.46761779785157, "max": 209.16221618652344, "pos_frac": 0.796875, "sample": [-2.411081314086914, -11.134666442871094, 6.417184829711914, 23.31436538696289, -0.3130607604980469, 46.605587005615234, 33.65507507324219, 34.94685363769531, 124.60232543945312, 23.2490177154541, 28.28282928466797, 143.942138671875, 5.835109710693359, -35.40974426269531, 101.39152526855469, -8.645477294921875, 25.68120574951172, 42.567962646484375, -1.7815818786621094, 21.85100555419922, 94.3426513671875, 18.032798767089844, 13.317819595336914, 179.3038330078125, 11.736600875854492, 70.0352783203125, 0.52459716796875, -29.98743438720703, 12.298585891723633, -40.229042053222656, -12.117610931396484, 58.11981201171875, 43.353782653808594, 32.37839889526367, 74.46171569824219, 209.16221618652344, 82.44993591308594, 44.2597541809082, 17.503524780273438, 25.74969482421875, 6.716072082519531, 32.83940887451172, 44.235130310058594, 94.97850036621094, 111.42256927490234, 8.497871398925781, 13.356575012207031, 74.04542541503906, -6.5581512451171875, -0.2757129669189453, 37.00749969482422, 102.84906005859375, 34.0691032409668, 10.859485626220703, 67.57374572753906, -13.914169311523438, 46.70820999145508, 15.418846130371094, 76.67156982421875, 52.63701629638672, 22.933223724365234, 51.1033935546875, 5.0677337646484375, -29.6116943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 20.597900390625, "std": 41.58925247192383, "min": -93.97457885742188, "p10": -23.245976829528804, "median": 19.528987884521484, "p90": 71.30773239135742, "max": 165.5408935546875, "pos_frac": 0.75, "sample": [58.835731506347656, 33.65278625488281, -7.67668342590332, 16.161453247070312, 26.611114501953125, -11.899948120117188, 35.953948974609375, 29.887672424316406, 6.759449005126953, 57.79931640625, 10.018966674804688, 1.1144027709960938, 42.56233215332031, 27.50658416748047, 5.271352767944336, 89.62388610839844, 21.114898681640625, -48.460906982421875, -41.525177001953125, -67.1458740234375, -17.708826065063477, -12.862434387207031, -0.5617828369140625, -74.39785766601562, 59.70353317260742, 17.943077087402344, 52.9906005859375, 23.856704711914062, 1.3651981353759766, 5.852655410766602, 48.29149627685547, -4.2493438720703125, 47.518592834472656, 6.810344696044922, 72.58351135253906, 32.792686462402344, 9.807138442993164, 11.778572082519531, 70.4780044555664, 29.20892333984375, 46.45284652709961, 17.809181213378906, 8.22894287109375, 0.18924903869628906, 34.29566192626953, 37.11619567871094, -14.551025390625, -29.330970764160156, -8.570785522460938, 74.65876007080078, 42.593894958496094, 77.0184097290039, 26.716501235961914, -93.97457885742188, 165.5408935546875, -14.104990005493164, 15.83099365234375, 92.48530578613281, -25.619041442871094, 71.663330078125, 50.93571472167969, 8.748863220214844, 43.00224304199219, 23.763988494873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 25.647552490234375, "std": 46.42403793334961, "min": -63.479591369628906, "p10": -24.379985427856443, "median": 19.859527587890625, "p90": 97.74723358154304, "max": 141.24618530273438, "pos_frac": 0.65625, "sample": [113.01988220214844, 141.24618530273438, 33.149871826171875, 33.860748291015625, -19.090988159179688, 118.64705657958984, 119.90518188476562, 12.732955932617188, 105.79193115234375, 126.10090637207031, -41.625946044921875, 32.499908447265625, -43.83649444580078, 52.3408203125, 14.755733489990234, -12.229866027832031, -25.728759765625, 16.364328384399414, 71.6609878540039, 28.46509552001953, -18.6295166015625, 31.58245849609375, 57.97013854980469, 71.70679473876953, 19.702774047851562, 20.016281127929688, 11.498981475830078, 71.38365936279297, 138.32650756835938, 29.597763061523438, -11.278350830078125, -7.225130081176758, -14.90670394897461, 57.34470748901367, 18.340017318725586, 41.68339538574219, 78.97627258300781, 55.056549072265625, -7.2207183837890625, 5.967372894287109, -2.7191104888916016, -16.41360855102539, 22.28144073486328, -7.928382873535156, -5.532806396484375, -19.221860885620117, 41.107582092285156, 35.70046615600586, 9.44554328918457, 28.70246696472168, -63.479591369628906, 35.70277786254883, -2.4323577880859375, -37.11454391479492, 27.189319610595703, 19.366071701049805, -21.232845306396484, 42.99348068237305, 8.922721862792969, -1.649026870727539, 54.728973388671875, -32.226200103759766, 43.19628143310547, -45.86625671386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 18.641904830932617, "std": 43.68214797973633, "min": -92.94400024414062, "p10": -25.50242004394531, "median": 12.56039810180664, "p90": 65.19984893798829, "max": 151.75143432617188, "pos_frac": 0.640625, "sample": [3.3209972381591797, 41.406105041503906, 66.2895278930664, 16.308120727539062, -0.96588134765625, 6.0704345703125, -11.699081420898438, -50.55335998535156, 24.24879264831543, -92.06085205078125, 31.432647705078125, 100.15414428710938, -14.807014465332031, 40.352447509765625, 32.74576950073242, -9.261632919311523, 8.812675476074219, 35.90472412109375, 57.11274719238281, 2.202342987060547, -20.45929718017578, -5.17515754699707, 54.90019226074219, -6.736883163452148, 66.70390319824219, 25.897659301757812, 41.52882385253906, 151.75143432617188, -4.6492767333984375, -0.38933563232421875, 123.12726593017578, -0.5426921844482422, 40.1449089050293, 44.28668975830078, 4.494588851928711, -19.806114196777344, 37.00074768066406, -92.94400024414062, 3.674072265625, -31.318771362304688, 1.0377445220947266, -53.71730041503906, -25.818084716796875, -6.343589782714844, 62.657264709472656, 37.8406982421875, 48.696861267089844, 89.00938415527344, 48.847198486328125, 3.4918365478515625, 77.5411148071289, -32.73393249511719, -7.612174987792969, 4.023445129394531, 57.43817901611328, -24.765869140625, 45.304168701171875, 37.44184875488281, 27.28892707824707, 38.95610046386719, -14.749000549316406, -18.735214233398438, 45.93805694580078, 53.541866302490234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 37.86910629272461, "std": 48.54381561279297, "min": -48.60595703125, "p10": -11.916370391845701, "median": 29.624174118041992, "p90": 91.51167449951173, "max": 209.5938720703125, "pos_frac": 0.796875, "sample": [-7.051242828369141, 8.680717468261719, 14.22816276550293, 38.700172424316406, -28.958511352539062, -30.87024688720703, 35.0538444519043, 41.54682922363281, 15.186412811279297, 19.117477416992188, 30.284725189208984, 46.936927795410156, 18.39904022216797, 19.14678955078125, -2.7930984497070312, 71.61184692382812, 123.16854858398438, 38.479530334472656, 54.72358703613281, 70.19963836669922, -44.64848327636719, -12.65631103515625, 55.56480407714844, 26.381072998046875, -48.60595703125, 80.65596771240234, 39.12720489501953, -17.356300354003906, 17.9637451171875, 19.29302978515625, 46.14717102050781, 191.63748168945312, 28.96131134033203, 18.271282196044922, 78.92628479003906, 77.50080108642578, -4.919288635253906, -10.189842224121094, 22.11374855041504, 17.975725173950195, -6.74383544921875, 44.68540954589844, 74.12979125976562, -37.902435302734375, 25.7275390625, 108.52643585205078, 89.07473754882812, 28.963623046875, 209.5938720703125, 111.34614562988281, 114.86725616455078, 92.55607604980469, 12.013015747070312, 45.82598114013672, 72.78822326660156, -1.4555740356445312, 33.734169006347656, 4.37986946105957, 2.981060028076172, 52.982765197753906, 88.17827606201172, 16.433544158935547, 35.394195556640625, 47.608055114746094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 36.06795883178711, "std": 54.158199310302734, "min": -70.93998718261719, "p10": -14.529857063293456, "median": 19.50301742553711, "p90": 105.43356018066407, "max": 215.5347900390625, "pos_frac": 0.75, "sample": [107.32208251953125, -8.720657348632812, 14.953044891357422, 17.593917846679688, 68.71357727050781, -6.646034240722656, -34.374595642089844, 3.527029037475586, -8.037010192871094, 41.723548889160156, -14.768884658813477, 39.76799392700195, 163.30044555664062, 54.33380126953125, 97.78980255126953, -4.136173248291016, 101.02700805664062, 33.94397735595703, -21.935531616210938, 128.4456787109375, 47.50762939453125, 2.9557418823242188, 19.995960235595703, 7.805973052978516, 15.197797775268555, 2.362295150756836, 215.5347900390625, -47.812232971191406, 61.55718994140625, 148.51296997070312, 149.80990600585938, 35.74494934082031, 14.975391387939453, 76.38804626464844, 13.507043838500977, 16.557090759277344, -13.972126007080078, -26.239830017089844, 133.4586181640625, 17.682052612304688, 54.220298767089844, -12.625534057617188, 76.46511840820312, -7.751533508300781, 90.63444519042969, 40.08875274658203, 34.38115692138672, 54.21940994262695, 46.028411865234375, 33.46778869628906, 5.170106887817383, 100.74070739746094, 6.933357238769531, 32.50988006591797, 66.27146911621094, 76.13166046142578, -10.032754898071289, 28.74814796447754, -0.6965847015380859, 19.010074615478516, -29.91071319580078, 1.030975341796875, -70.93998718261719, 8.902408599853516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 50.99995422363281, "std": 47.099082946777344, "min": -83.65859985351562, "p10": 1.5526329040527382, "median": 50.609317779541016, "p90": 111.78676452636722, "max": 192.0529327392578, "pos_frac": 0.890625, "sample": [53.803497314453125, 33.97911071777344, 49.12702941894531, 28.453628540039062, 43.60395050048828, 52.09160614013672, 77.28811645507812, 118.9227523803711, 7.941198348999023, -83.65859985351562, -8.5469970703125, 87.62913513183594, 91.57192993164062, 13.597410202026367, 36.112762451171875, 53.95662307739258, -37.645484924316406, -0.067779541015625, -8.13372802734375, 120.95999908447266, 35.7647705078125, 118.78562927246094, 73.69590759277344, 22.16869354248047, 10.5645751953125, 88.07855987548828, 69.1311264038086, 47.87297821044922, 10.607873916625977, 38.25257110595703, 78.09828186035156, 190.29718017578125, 24.003463745117188, 55.68013000488281, 21.058334350585938, 22.52191925048828, 79.05459594726562, 101.27531433105469, 52.84687805175781, 14.745197296142578, 53.98686218261719, 101.72467041015625, 40.26933288574219, 42.516502380371094, 52.46685791015625, 56.701385498046875, -0.36231231689453125, 69.97056579589844, 76.21769714355469, 116.09909057617188, 43.356773376464844, 22.46673583984375, -23.284080505371094, 11.236259460449219, 72.39231872558594, 65.45671081542969, 60.355308532714844, 127.48809814453125, 67.52507019042969, 192.0529327392578, 29.83582305908203, 5.333595275878906, 13.657724380493164, 81.04316711425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 31.94563865661621, "std": 40.35704803466797, "min": -59.787696838378906, "p10": -13.178890228271477, "median": 32.23625183105469, "p90": 85.96441345214846, "max": 144.98904418945312, "pos_frac": 0.828125, "sample": [31.126617431640625, 5.57899284362793, 10.529718399047852, -5.674402236938477, 18.634078979492188, 2.6973094940185547, 54.46752166748047, -7.111488342285156, 35.19452667236328, 71.61735534667969, 39.66373825073242, 38.87425994873047, 2.353178024291992, 81.00350952148438, 30.68933868408203, 105.40583801269531, 8.362951278686523, 11.979694366455078, 88.09051513671875, 1.6959724426269531, 69.0767822265625, 46.67191696166992, 0.6843338012695312, 94.27621459960938, 25.95121192932129, 49.39592742919922, 6.127174377441406, -15.779205322265625, 61.05292510986328, -45.03004455566406, 54.045799255371094, 16.290969848632812, 45.482852935791016, 43.47441482543945, -3.060037612915039, 3.9040145874023438, 25.132007598876953, -1.97314453125, 103.22750854492188, 67.6009750366211, 71.04557800292969, 26.952274322509766, -59.787696838378906, 6.023445129394531, 144.98904418945312, 44.8157958984375, 29.17209243774414, 33.34588623046875, 38.39878845214844, 11.0162353515625, 0.4004230499267578, 63.84324645996094, 112.84046173095703, 37.93254089355469, 61.905120849609375, 63.34974670410156, -27.989845275878906, 38.351966857910156, -26.283279418945312, 33.36817932128906, 95.21563720703125, -47.471885681152344, -29.877037048339844, 51.232330322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 35.80072021484375, "std": 48.18320846557617, "min": -104.91288757324219, "p10": -13.63789234161376, "median": 24.258602142333984, "p90": 93.04911422729494, "max": 160.0502471923828, "pos_frac": 0.828125, "sample": [95.10063171386719, 121.494873046875, 50.428253173828125, 73.69470977783203, 26.686630249023438, 34.455772399902344, 58.44807434082031, -3.9688472747802734, -104.91288757324219, 10.333053588867188, 17.341022491455078, 84.72732543945312, -46.569801330566406, 27.975788116455078, 10.176984786987305, 7.84783935546875, 15.895349502563477, 89.01099395751953, 2.7902603149414062, -30.151695251464844, 120.2266616821289, 73.47699737548828, 88.595947265625, 5.976289749145508, -20.96600341796875, 2.631643295288086, -2.3523941040039062, 3.0490264892578125, 160.0502471923828, 82.00037384033203, 20.271427154541016, 17.70935821533203, 113.96392059326172, 48.884490966796875, 72.44732666015625, 94.42053985595703, 17.971050262451172, 28.783843994140625, 18.974546432495117, -17.781768798828125, 69.68766784667969, -53.24481201171875, 69.4071044921875, 79.22955322265625, 10.769126892089844, 89.84912109375, -1.26458740234375, 88.16473388671875, 52.50288391113281, 59.13755798339844, 28.342002868652344, 23.973814010620117, 2.1368255615234375, -31.278480529785156, -2.4809951782226562, 5.115386962890625, 13.405004501342773, 80.77825927734375, 6.647125244140625, 24.54339027404785, 75.24308776855469, 116.234619140625, 3.6843719482421875, 11.525375366210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 43.90226745605469, "std": 64.78881072998047, "min": -71.73118591308594, "p10": -27.29498653411865, "median": 30.298089027404785, "p90": 136.31152954101566, "max": 237.23138427734375, "pos_frac": 0.78125, "sample": [37.61382293701172, 154.37171936035156, 42.24169158935547, -52.20599365234375, -71.73118591308594, 119.1302490234375, 31.947450637817383, 10.553077697753906, 237.23138427734375, 1.0796890258789062, -13.597457885742188, 142.88961791992188, 9.708480834960938, 7.6373748779296875, -6.756019592285156, 84.06060028076172, -7.704784393310547, 110.50248718261719, 185.6071319580078, 115.78173065185547, 130.24407958984375, 90.26451110839844, 2.641347885131836, -24.444124221801758, 52.036407470703125, 11.368448257446289, 28.079010009765625, 16.176433563232422, 87.72109985351562, 5.5976409912109375, 28.648727416992188, 0.8896942138671875, 76.6373062133789, -28.51678466796875, 191.37039184570312, -46.24009704589844, 86.50270080566406, -32.80076217651367, -16.1060791015625, 4.396799087524414, 26.50225830078125, 8.257034301757812, 55.73170471191406, 177.0751953125, 39.40833282470703, 10.986661911010742, 98.57817840576172, 81.57344055175781, 34.26817321777344, -59.210113525390625, 17.228897094726562, 55.2867431640625, 138.911865234375, 68.49334716796875, -3.9522705078125, -0.6761608123779297, 40.96781921386719, 18.311656951904297, 77.88479614257812, -52.35990524291992, 32.428306579589844, 43.04367446899414, 73.65919494628906, 24.518356323242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 46.090152740478516, "std": 56.81373596191406, "min": -107.54232788085938, "p10": -9.733175659179686, "median": 38.9838752746582, "p90": 117.24542312622071, "max": 205.50827026367188, "pos_frac": 0.828125, "sample": [53.540626525878906, 8.473785400390625, 151.38465881347656, 96.00208282470703, 36.08074188232422, 57.80624008178711, -65.93440246582031, 10.898323059082031, 18.9774169921875, 52.536964416503906, 147.08843994140625, 45.58833694458008, 80.36978149414062, 64.67503356933594, 57.140541076660156, 5.828807830810547, 22.773609161376953, 39.20679473876953, -15.143077850341797, -5.384721755981445, -6.235496520996094, 205.50827026367188, 0.8534011840820312, -8.976287841796875, 25.59984016418457, 5.399711608886719, 136.6846466064453, 29.96930503845215, 38.03541564941406, -45.24103927612305, 69.99913024902344, 74.6739501953125, 9.63960075378418, 85.33929443359375, 59.67765808105469, 40.28509521484375, 114.27284240722656, 80.3409194946289, 118.5193862915039, 113.54767608642578, -2.1574478149414062, 14.633934020996094, -107.54232788085938, 30.97616195678711, 0.691192626953125, 77.37310028076172, 38.760955810546875, 35.3804817199707, 21.32891273498535, 88.4986572265625, -59.74688720703125, 34.42364501953125, 37.661739349365234, 123.92912292480469, 139.33567810058594, 41.50624084472656, 95.20015716552734, 40.72509765625, 99.52642822265625, 114.07796478271484, -10.05755615234375, -28.970809936523438, 1.9996585845947266, 112.4123306274414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 41.4376335144043, "std": 59.968360900878906, "min": -111.28544616699219, "p10": -15.72221794128418, "median": 33.758973121643066, "p90": 124.6193252563477, "max": 199.37872314453125, "pos_frac": 0.8125, "sample": [145.23992919921875, 60.3974609375, 22.538990020751953, 57.48936462402344, 65.0878677368164, 39.63280487060547, 54.311161041259766, -4.686676025390625, -111.28544616699219, 50.189414978027344, 72.72172546386719, 16.599578857421875, -76.09255981445312, 20.64079475402832, 12.321304321289062, 1.2718677520751953, 31.424959182739258, 183.76995849609375, 114.10636901855469, 29.66252899169922, 5.268424987792969, -10.066692352294922, 199.37872314453125, 151.75006103515625, 110.30348205566406, 82.32862854003906, 20.226058959960938, -24.99047088623047, -15.82479476928711, 63.17124938964844, -49.04338073730469, -15.482872009277344, 10.272893905639648, 19.19097137451172, 5.44713020324707, 171.1131591796875, 58.29386901855469, 90.54217529296875, 129.1248779296875, 5.05091667175293, 82.90143585205078, -8.421452522277832, 44.69573974609375, 36.092987060546875, 15.668621063232422, 9.195236206054688, 90.33646392822266, -62.8427734375, 83.84129333496094, -30.856796264648438, 42.47985076904297, 1.5558891296386719, 60.07262420654297, 40.24982833862305, 54.519229888916016, 49.131752014160156, 20.887554168701172, 20.903472900390625, 19.032522201538086, 48.99256134033203, 89.80223083496094, 2.0478553771972656, -6.314334869384766, 156.64083862304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 47.13909149169922, "std": 66.36956024169922, "min": -97.41952514648438, "p10": -38.36222229003906, "median": 37.03273391723633, "p90": 135.3053665161133, "max": 208.6326904296875, "pos_frac": 0.765625, "sample": [208.6326904296875, -60.787841796875, -95.2337646484375, 137.81256103515625, -6.574394226074219, 59.77693176269531, 16.96874237060547, 38.13915252685547, 28.685169219970703, 162.3643341064453, 34.92497253417969, 84.75848388671875, 45.67840576171875, 62.408355712890625, 29.548141479492188, 127.61306762695312, 118.81555938720703, -42.837860107421875, 109.51231384277344, 7.333078384399414, 85.46831512451172, -34.142547607421875, -25.28609848022461, -12.50152587890625, 73.24797058105469, 126.38414001464844, -8.636802673339844, 124.77915954589844, -40.170654296875, 13.46490478515625, -26.773406982421875, 33.83589172363281, 152.8846435546875, 24.60137939453125, 169.67910766601562, 61.00250244140625, 35.92631530761719, 59.664791107177734, 74.1257553100586, 138.1470947265625, 25.485252380371094, -49.52095031738281, 19.099319458007812, 35.41927719116211, 144.40325927734375, 77.59210205078125, 29.732711791992188, 70.48106384277344, 129.4552459716797, 2.776308059692383, 122.36732482910156, 40.58714294433594, 104.15486907958984, 73.99522399902344, 57.668678283691406, 14.896272659301758, -97.41952514648438, 33.87544250488281, 31.587133407592773, 43.7967529296875, -4.335630416870117, 104.46694946289062, -74.796875, -12.104660034179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 29.63661766052246, "std": 44.817562103271484, "min": -63.31299591064453, "p10": -28.408707427978516, "median": 27.71198081970215, "p90": 100.92364654541018, "max": 128.04928588867188, "pos_frac": 0.71875, "sample": [50.66946029663086, 128.04928588867188, 96.33317565917969, -6.801448822021484, -47.13163375854492, 37.635154724121094, 40.325218200683594, 39.00103759765625, 33.80677795410156, -4.197044372558594, 107.602783203125, 84.90237426757812, -2.143350601196289, -2.3532943725585938, 102.8909912109375, 65.10784912109375, 41.579532623291016, 59.283790588378906, 42.55302810668945, 43.191314697265625, 45.91926574707031, 26.943801879882812, 75.21026611328125, -19.855636596679688, 25.694610595703125, 24.896249771118164, 34.785865783691406, 35.61667251586914, -27.412887573242188, 17.031024932861328, 0.9572010040283203, -37.34114456176758, 35.65752410888672, 117.2877426147461, 122.95943450927734, -36.125244140625, 44.320709228515625, 16.62308120727539, 79.88186645507812, -1.0502452850341797, -63.31299591064453, 104.51496124267578, 28.480159759521484, 13.830894470214844, -17.836288452148438, 22.600406646728516, 69.14669799804688, -1.2603321075439453, 14.784461975097656, 107.67146301269531, 30.212623596191406, 10.003093719482422, 47.830162048339844, -51.05023193359375, 15.388296127319336, -28.835487365722656, -29.12523651123047, -13.856803894042969, 83.3482894897461, 0.8790664672851562, 25.50472640991211, 4.7689666748046875, -10.737594604492188, 41.48896026611328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 31.487873077392578, "std": 42.6778450012207, "min": -44.73192596435547, "p10": -19.05921459197998, "median": 30.038716316223145, "p90": 90.15596389770508, "max": 127.81893920898438, "pos_frac": 0.734375, "sample": [-36.145233154296875, 84.65826416015625, -18.611093521118164, 100.79046630859375, 6.335296630859375, 41.929466247558594, 13.789459228515625, 37.72505187988281, 2.6436519622802734, -13.430412292480469, 10.595726013183594, 94.68260192871094, 56.702415466308594, 127.37432861328125, 40.80237579345703, -20.499814987182617, -10.69510269165039, -12.389602661132812, 6.6068115234375, 67.14988708496094, -22.835662841796875, 54.48949432373047, 90.51806640625, -27.040496826171875, 59.1082763671875, -9.941658020019531, 2.7161922454833984, 125.49471282958984, 53.013328552246094, -44.73192596435547, 20.47802734375, 78.66190338134766, 37.94967269897461, -11.364578247070312, 37.21061706542969, 55.42070007324219, 16.06507110595703, 89.3110580444336, 18.200897216796875, -19.251266479492188, 56.548851013183594, 76.165771484375, -15.00990104675293, 2.202890396118164, -20.85882568359375, 45.632205963134766, 41.771339416503906, 43.407867431640625, -10.031707763671875, 16.785287857055664, 127.81893920898438, -4.464244842529297, 36.46552276611328, 30.088048934936523, 111.46393585205078, 88.16693115234375, 35.10961151123047, 65.40758514404297, -8.37135124206543, 11.586654663085938, 9.2010498046875, 13.029827117919922, 29.989383697509766, 49.63116455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 50.390052795410156, "std": 70.8586196899414, "min": -46.27106475830078, "p10": -32.68094825744629, "median": 43.28854179382324, "p90": 121.02878112792972, "max": 381.62713623046875, "pos_frac": 0.765625, "sample": [200.41275024414062, 96.30411529541016, 57.0955810546875, 49.53711700439453, 28.956886291503906, 124.44052124023438, 22.411226272583008, 49.24652862548828, -45.04539489746094, 72.69216918945312, -34.04456329345703, 80.29473876953125, 43.77106475830078, 23.599639892578125, -41.144325256347656, 94.69906616210938, 14.032024383544922, -39.33409881591797, 36.59553527832031, 135.1352081298828, 11.46672248840332, 59.607505798339844, -5.5931396484375, 128.92208862304688, 42.31832504272461, 62.76789855957031, 38.660926818847656, 25.306568145751953, 41.98724365234375, 64.40769958496094, 69.26647186279297, -6.122833251953125, -8.5169677734375, 57.41461181640625, -32.74180221557617, 17.53815269470215, 54.13634490966797, 77.91732788085938, 38.16612243652344, 113.06805419921875, 63.487369537353516, 91.44941711425781, 6.844076156616211, 52.498817443847656, 42.8060188293457, -16.118919372558594, 249.96531677246094, 7.600603103637695, 45.59898376464844, 152.64254760742188, -11.72343635559082, 77.67931365966797, -46.27106475830078, -32.53895568847656, 381.62713623046875, 57.81449508666992, -4.276782989501953, 98.09698486328125, 35.62525939941406, -35.54317092895508, 30.23345184326172, 96.56330871582031, -20.611499786376953, 81.88102722167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 34.20935821533203, "std": 71.51499938964844, "min": -163.87503051757812, "p10": -33.770948028564455, "median": 20.425628662109375, "p90": 132.69615936279303, "max": 251.01077270507812, "pos_frac": 0.71875, "sample": [-1.5799827575683594, -2.0958728790283203, 137.33241271972656, 66.852294921875, -62.274505615234375, -1.91461181640625, -54.47697448730469, 9.439613342285156, 13.34029769897461, 16.687042236328125, 27.61742401123047, -4.8198089599609375, 47.3109016418457, 57.914894104003906, 8.652429580688477, 184.13905334472656, 22.354433059692383, -10.98651123046875, 251.01077270507812, -12.149896621704102, 81.08568572998047, 8.813697814941406, 153.05532836914062, 75.798583984375, 184.05889892578125, -34.46675109863281, 22.8931884765625, 26.723995208740234, 8.451372146606445, 106.28822326660156, 189.24656677246094, 107.8931884765625, -2.2650909423828125, -135.05319213867188, 11.704044342041016, 35.27190399169922, 65.55183410644531, 16.878768920898438, 7.8694915771484375, 49.099220275878906, 27.751953125, 57.50001525878906, 27.248077392578125, 33.05615234375, 153.15121459960938, -33.74803161621094, 23.009933471679688, 20.467369079589844, -9.273750305175781, 13.138435363769531, 20.383888244628906, 86.63014221191406, 4.661674499511719, 9.161739349365234, -21.775062561035156, 117.90389251708984, 68.42558288574219, -19.018375396728516, 67.46876525878906, -163.87503051757812, 121.87823486328125, -56.086509704589844, 3.867055892944336, -33.78076934814453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 38.464725494384766, "std": 50.134273529052734, "min": -40.32250213623047, "p10": -19.03753147125244, "median": 33.84121322631836, "p90": 96.15133819580079, "max": 225.62574768066406, "pos_frac": 0.78125, "sample": [7.804109573364258, 6.923542022705078, -37.439998626708984, -35.54419708251953, 25.95863914489746, 47.39094543457031, 69.68875122070312, 24.18604278564453, 51.516448974609375, 62.944313049316406, 33.782379150390625, -13.992311477661133, 46.58100128173828, 27.83068084716797, 11.550445556640625, -6.567604064941406, 56.6900634765625, -40.32250213623047, 89.4190673828125, 95.53681945800781, 43.05665588378906, -20.209091186523438, 26.681381225585938, 18.13268280029297, 55.03129577636719, 26.01331329345703, 88.10455322265625, 112.80979919433594, 33.900047302246094, -32.62091827392578, 135.44580078125, 35.96205139160156, 23.57857894897461, 47.92979431152344, -16.303892135620117, -31.018165588378906, 2.297161102294922, 7.366691589355469, -4.727411270141602, 110.51516723632812, 5.406137466430664, 148.49208068847656, 34.43682861328125, 92.33782958984375, 87.72547912597656, 38.53761672973633, 57.67591857910156, -36.94580078125, -9.537296295166016, 87.8531265258789, 22.643577575683594, -6.209209442138672, 225.62574768066406, 71.22118377685547, 12.061836242675781, 53.506004333496094, 73.81207275390625, 70.28227233886719, -11.70970344543457, 15.31060791015625, 0.8757476806640625, 42.02711486816406, 104.01641845703125, 96.41470336914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 31.281452178955078, "std": 54.881553649902344, "min": -107.49905395507812, "p10": -29.518272781372062, "median": 25.020426750183105, "p90": 93.51421737670901, "max": 203.72579956054688, "pos_frac": 0.765625, "sample": [11.826824188232422, 55.434326171875, 52.85899353027344, -19.00515365600586, 23.88667106628418, -4.3619537353515625, 19.241666793823242, 42.327857971191406, 22.00812530517578, 42.378517150878906, 44.4186897277832, 72.83011627197266, 146.03570556640625, 118.99197387695312, 16.798683166503906, 49.47782897949219, -57.06529235839844, 97.19248962402344, -0.2861328125, -97.63095092773438, -34.023895263671875, 7.33880615234375, 16.053550720214844, 68.82655334472656, 46.8229866027832, -18.65509033203125, -42.36363983154297, 3.912137985229492, 26.15418243408203, -17.77371597290039, 41.870819091796875, 126.24861145019531, 0.42656707763671875, 8.9365234375, 162.14959716796875, 13.428565979003906, 203.72579956054688, 17.751419067382812, 32.10350799560547, 70.7940902709961, 17.478343963623047, 11.452770233154297, 8.835365295410156, -9.742324829101562, -107.49905395507812, 31.97864532470703, 85.83551025390625, 33.65387725830078, 96.80509185791016, 42.02934265136719, -47.71513366699219, -11.713506698608398, 7.440608978271484, 58.75918197631836, 82.97969055175781, 82.68714904785156, 5.18975830078125, 52.76874542236328, 80.95769500732422, 27.511192321777344, 46.9898681640625, -3.6573104858398438, -40.50090026855469, 78.40198516845703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 37.86691665649414, "std": 49.817588806152344, "min": -57.64482116699219, "p10": -15.451903152465814, "median": 25.51577377319336, "p90": 116.64057769775394, "max": 165.98895263671875, "pos_frac": 0.75, "sample": [-7.273529052734375, 20.945220947265625, -1.632345199584961, -26.909133911132812, 20.34058952331543, 15.768421173095703, 62.05925369262695, 44.867897033691406, 47.97145080566406, 122.79985046386719, 93.75337219238281, 23.42828369140625, 135.36651611328125, 32.357948303222656, -9.179019927978516, 57.523704528808594, -23.23654556274414, -18.346755981445312, 36.94845962524414, 165.98895263671875, 122.24073791503906, -8.499401092529297, -57.64482116699219, 2.4506607055664062, -42.74028015136719, 21.20359230041504, 2.630491256713867, 35.56067657470703, 16.786861419677734, 18.907333374023438, 10.07537841796875, 80.70929718017578, 3.88970947265625, 81.15027618408203, 25.272193908691406, 50.63047790527344, 24.58013343811035, 44.9920654296875, 159.769287109375, -4.477603912353516, 32.73426055908203, -18.140281677246094, -2.1280364990234375, 41.245323181152344, 157.23446655273438, 92.43828582763672, 120.83042907714844, 45.30912780761719, 81.27667999267578, 14.452184677124023, -1.970062255859375, 73.04637908935547, 106.8642578125, 71.21038818359375, 55.08833312988281, 27.474349975585938, 40.13676452636719, 17.152435302734375, -1.1233539581298828, 25.759353637695312, -24.118942260742188, 88.2297134399414, 5.1724853515625, -5.721523284912109], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 37.58320236206055, "std": 61.240882873535156, "min": -74.95347595214844, "p10": -33.45987319946289, "median": 37.16095542907715, "p90": 104.9494400024414, "max": 227.6328125, "pos_frac": 0.671875, "sample": [14.323732376098633, 67.11457824707031, 42.72499084472656, 165.82626342773438, 38.39923095703125, -37.24656677246094, -5.662567138671875, 52.22822570800781, -4.982784271240234, 52.32080841064453, -12.894721984863281, -13.550254821777344, 227.6328125, 51.95203399658203, 21.26418685913086, 130.04812622070312, 20.75790786743164, 12.927326202392578, 195.00169372558594, 50.311187744140625, 103.75277709960938, 80.41850280761719, 80.57367706298828, -9.75888442993164, 33.56792449951172, 35.94898223876953, -36.72149658203125, 19.706886291503906, 60.200408935546875, 61.12617492675781, -1.5559005737304688, -3.512420654296875, 105.46229553222656, 38.372928619384766, 181.63720703125, -1.9754257202148438, 67.59406280517578, 90.31137084960938, -2.4585742950439453, 28.27356719970703, -63.63340759277344, 45.754737854003906, -66.92573547363281, 135.01727294921875, 52.59100341796875, 68.88798522949219, -28.352340698242188, -34.71278381347656, 92.16869354248047, 39.10921859741211, -30.536415100097656, -74.95347595214844, -3.424041748046875, 96.58969116210938, 15.58734130859375, 61.46340560913086, 55.826377868652344, 10.849893569946289, 64.10298156738281, -7.105680465698242, -44.312095642089844, 1.4601287841796875, 43.95176696777344, -23.539756774902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 52.281673431396484, "std": 64.28251647949219, "min": -92.73348999023438, "p10": -21.86342582702636, "median": 41.81755065917969, "p90": 142.95119476318362, "max": 209.07839965820312, "pos_frac": 0.828125, "sample": [160.5682830810547, 93.5509033203125, 34.139312744140625, 116.91102600097656, 38.419620513916016, 7.614934921264648, -61.9833984375, 106.58583068847656, 21.190824508666992, 74.20589447021484, 54.30371856689453, 176.22134399414062, 86.35250091552734, 9.938133239746094, 18.654767990112305, 30.156875610351562, 145.87857055664062, 65.43948364257812, 73.86974334716797, -40.36479949951172, -39.98509216308594, 136.1206512451172, 45.803985595703125, 33.590675354003906, 46.067893981933594, 38.32371520996094, 49.62806701660156, 7.093788146972656, 180.11672973632812, -92.73348999023438, -25.898338317871094, 39.69337463378906, 28.69666290283203, 3.707305908203125, 115.02613830566406, 11.690057754516602, 209.07839965820312, 56.66926956176758, 76.1190185546875, 3.489542007446289, 13.10018539428711, -9.574804306030273, 57.61405944824219, 204.27685546875, 37.99748229980469, 42.83099365234375, 135.6982879638672, 180.19039916992188, 42.70625305175781, 51.241180419921875, -0.4343109130859375, 95.09070587158203, -49.00846862792969, 21.348617553710938, 66.34900665283203, 40.92884826660156, -2.6330223083496094, 121.96688842773438, 82.44114685058594, -15.294124603271484, 79.44930267333984, -24.67884063720703, 38.117095947265625, 2.351409912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 57.87868881225586, "std": 75.07471466064453, "min": -158.22695922851562, "p10": -10.547252655029295, "median": 51.813438415527344, "p90": 163.02898254394535, "max": 232.1199188232422, "pos_frac": 0.765625, "sample": [9.726518630981445, -8.371635437011719, -2.08489990234375, 232.1199188232422, -6.405008316040039, 12.690681457519531, 103.58038330078125, -16.846942901611328, 81.6660385131836, 85.24349212646484, -4.56732177734375, 68.09366607666016, 150.78372192382812, 113.94551849365234, 97.91911315917969, -0.7735061645507812, 93.24098205566406, 69.35195922851562, 57.05094909667969, -46.15734100341797, 40.457557678222656, 118.09750366210938, -64.2402114868164, 166.06455993652344, 180.6905975341797, 70.30876159667969, 38.777381896972656, 17.778629302978516, 120.02944946289062, 4.628438949584961, 80.92721557617188, -22.219139099121094, 48.018646240234375, 92.93751525878906, 2.742429733276367, 211.1304931640625, 4.559806823730469, 197.34780883789062, 46.683319091796875, 219.62664794921875, 10.534568786621094, 134.20379638671875, -11.479660034179688, 19.256179809570312, -17.468841552734375, 9.179222106933594, 78.93074035644531, 25.58440399169922, -6.609962463378906, 59.673309326171875, 6.864414215087891, 19.59484100341797, 62.21063232421875, 144.38580322265625, 155.9459686279297, 75.0268783569336, -158.22695922851562, 57.876556396484375, -4.223297119140625, 82.35722351074219, 225.26873779296875, 18.55594253540039, -3.3666610717773438, 55.60823059082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 71.17927551269531, "std": 71.06498718261719, "min": -96.66424560546875, "p10": -0.4476539611816396, "median": 52.25942611694336, "p90": 172.8968276977539, "max": 250.2943878173828, "pos_frac": 0.890625, "sample": [161.8503875732422, 24.414352416992188, 80.34955596923828, 88.89801025390625, 19.653154373168945, 26.863731384277344, 49.96965026855469, 104.1210708618164, 24.84038543701172, 120.30625915527344, 15.625869750976562, 137.88339233398438, 131.42813110351562, 244.41717529296875, 108.16509246826172, 38.35944366455078, 17.01726531982422, 55.07195281982422, 173.3113555908203, 50.135986328125, 22.878931045532227, 144.68600463867188, 38.275211334228516, -8.975860595703125, 92.97198486328125, 13.578369140625, 67.41463470458984, 143.75241088867188, 74.20201873779297, 69.49480438232422, 82.2625503540039, -28.974319458007812, 183.31744384765625, -7.631967544555664, 54.38286590576172, 179.14166259765625, 64.06063842773438, 24.45331573486328, 193.86016845703125, 167.33595275878906, 8.777872085571289, 48.08732604980469, -35.84100341796875, 250.2943878173828, 18.43720245361328, 40.45881271362305, 34.42432403564453, 44.81875991821289, 22.64459991455078, -96.66424560546875, -8.759849548339844, 29.883697509765625, 35.523406982421875, 67.252197265625, 215.54327392578125, -0.8865394592285156, 124.3668212890625, 56.76055145263672, 0.5764122009277344, 16.43811798095703, 171.92959594726562, 82.53816223144531, 20.904897689819336, 164.7962646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 46.348785400390625, "std": 77.91118621826172, "min": -154.49710083007812, "p10": -29.91035099029541, "median": 23.360222816467285, "p90": 151.45179901123052, "max": 248.39263916015625, "pos_frac": 0.65625, "sample": [89.27766418457031, 85.07264709472656, -6.103706359863281, 9.392070770263672, 94.37351989746094, 83.44441223144531, 137.63238525390625, 125.56961059570312, 84.05207061767578, 51.119415283203125, -0.6376171112060547, -21.755859375, 8.04873275756836, -46.700172424316406, 157.37440490722656, 94.80393981933594, 16.307735443115234, -13.293426513671875, -2.137115478515625, 71.5555419921875, -6.0002593994140625, 127.6580810546875, 69.30827331542969, -80.80703735351562, 19.922607421875, 101.40875244140625, 215.04214477539062, 24.259300231933594, 165.00979614257812, 170.23959350585938, -28.801015853881836, 61.76226806640625, 21.55893325805664, 8.23553466796875, 88.61272430419922, 129.69595336914062, 16.533241271972656, 48.90043640136719, 186.2870635986328, -20.362899780273438, 99.62149047851562, 91.86576843261719, -43.68674850463867, -75.15015411376953, 80.09684753417969, -9.290283203125, -46.24589157104492, -3.819334030151367, -18.97931671142578, 248.39263916015625, -30.385780334472656, 71.55232238769531, 58.36656951904297, 224.57989501953125, -13.308403015136719, 22.461145401000977, 127.62986755371094, 34.6065673828125, -154.49710083007812, -26.010231018066406, -1.1549301147460938, -9.020954132080078, 0.4933357238769531, 2.3450794219970703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 69.19841003417969, "std": 100.72116088867188, "min": -132.19422912597656, "p10": -27.68361206054687, "median": 46.14408302307129, "p90": 190.0059753417969, "max": 391.3918762207031, "pos_frac": 0.796875, "sample": [163.86907958984375, -25.26885986328125, 43.19646453857422, 34.689117431640625, 29.869853973388672, 155.18312072753906, -104.33534240722656, -9.151416778564453, 176.09130859375, 194.27658081054688, -4.961738586425781, 32.51300048828125, 4.083702087402344, 74.47221374511719, 87.27239227294922, 235.06304931640625, 169.19613647460938, 52.28849411010742, -21.792327880859375, 55.31804656982422, -132.19422912597656, -43.828514099121094, 150.0438232421875, 0.2898597717285156, 217.36248779296875, 141.67547607421875, 87.97833251953125, -4.918243408203125, 48.92365264892578, 125.9581298828125, -28.718505859375, 33.598697662353516, 95.41864013671875, 10.681161880493164, 13.22479248046875, 364.6168212890625, 109.40412902832031, 191.67831420898438, 33.32245635986328, 186.10385131835938, 114.65021514892578, -59.12897491455078, 17.755268096923828, -19.81817626953125, 40.999664306640625, 132.62945556640625, -106.509765625, 108.67584228515625, 21.323286056518555, 53.1732177734375, 43.3645133972168, 88.29961395263672, -69.73355102539062, 273.2192077636719, 24.551361083984375, 391.3918762207031, 54.1131591796875, 177.29183959960938, 1.193674087524414, 34.389259338378906, 52.04570007324219, 86.09070587158203, 15.405742645263672, 10.831501007080078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 60.9357795715332, "std": 83.69886016845703, "min": -105.4774169921875, "p10": -31.067874908447266, "median": 35.77558898925781, "p90": 178.9188888549805, "max": 263.3975524902344, "pos_frac": 0.765625, "sample": [-3.4405155181884766, 159.5470428466797, 96.30891418457031, 23.866470336914062, -19.11078643798828, 9.286705017089844, 122.72663879394531, -29.761062622070312, 150.37472534179688, 14.62122917175293, 11.048641204833984, -31.62793731689453, 23.045251846313477, -2.88946533203125, 105.5779037475586, 119.42398071289062, 98.26881408691406, 51.452964782714844, 21.160356521606445, 3.2538108825683594, 107.16743469238281, 19.508560180664062, -60.10050964355469, -12.9200439453125, 98.14225006103516, 263.3975524902344, -32.7435302734375, 31.800710678100586, 113.09523010253906, 38.81571960449219, -5.8401031494140625, -86.35717010498047, 36.392578125, 17.254016876220703, 35.158599853515625, 228.39537048339844, 7.694530487060547, 227.92880249023438, 123.94500732421875, -26.50762176513672, 12.13162612915039, 103.81503295898438, 195.74444580078125, 96.58789825439453, 243.31759643554688, 99.27638244628906, 55.50010681152344, 49.596336364746094, 183.6087646484375, 51.49723815917969, 16.219894409179688, 138.59561157226562, 44.42835235595703, -20.630277633666992, 130.7296905517578, 27.907146453857422, -49.09535217285156, 167.97584533691406, 32.699867248535156, -105.4774169921875, 220.45518493652344, 164.74676513671875, -36.23847198486328, 29.136621475219727], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 56.966033935546875, "std": 77.21730041503906, "min": -79.9509048461914, "p10": -23.679612350463866, "median": 38.51036262512207, "p90": 164.61319732666018, "max": 280.65911865234375, "pos_frac": 0.828125, "sample": [135.0428466796875, 66.53041076660156, 11.084054946899414, 91.43519592285156, 12.434783935546875, 56.966270446777344, -39.386474609375, 2.116790771484375, 65.2943115234375, 158.68798828125, -59.49668884277344, 0.029827117919921875, 280.65911865234375, 16.83545684814453, 90.43256378173828, 2.12042236328125, 138.65155029296875, 44.717079162597656, 138.4107666015625, 71.98880004882812, 47.60149002075195, 55.0018196105957, -8.201168060302734, 39.75369644165039, 70.72908782958984, 200.15505981445312, -24.682479858398438, 167.15257263183594, 33.62501907348633, 49.58232879638672, 20.69137954711914, 26.65505599975586, 77.18319702148438, 15.259971618652344, 192.16671752929688, 37.26702880859375, 60.549842834472656, -79.87861633300781, 156.87164306640625, -21.339588165283203, -70.80805969238281, -39.64649963378906, 252.46926879882812, -3.649087905883789, 29.802946090698242, 10.76315689086914, 2.1369056701660156, 21.516788482666016, 134.91888427734375, 126.55491638183594, -79.9509048461914, 195.7212371826172, 91.16242980957031, 25.530731201171875, 35.79692077636719, 101.90821838378906, 34.78680419921875, 65.90397644042969, 17.362812042236328, -19.551536560058594, 14.68316650390625, 96.42990112304688, 13.644973754882812, 187.63906860351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 41.24645233154297, "std": 66.36114501953125, "min": -135.19232177734375, "p10": -36.63717346191406, "median": 43.674495697021484, "p90": 121.99184341430664, "max": 195.6309356689453, "pos_frac": 0.78125, "sample": [55.91325378417969, 83.63428497314453, -12.183893203735352, 42.58184051513672, 3.105987548828125, -102.87004089355469, 157.0833740234375, 11.613311767578125, 100.98461151123047, -135.19232177734375, 7.164533615112305, 65.66780853271484, 79.24519348144531, -33.41206359863281, 18.40265464782715, 38.7073974609375, 97.79293060302734, 11.463376998901367, -130.40989685058594, 80.94920349121094, 25.00865936279297, 75.57449340820312, 135.12220764160156, 23.202194213867188, 68.96798706054688, 178.91510009765625, 23.002269744873047, 90.3236083984375, -29.22748565673828, 14.598800659179688, 125.56493377685547, 82.07714080810547, 113.31332397460938, 25.85698699951172, 57.22673034667969, 158.35983276367188, -2.021076202392578, -18.155540466308594, 20.451200485229492, -2.6363754272460938, 18.072574615478516, 119.59276580810547, 8.923255920410156, 81.04584503173828, 51.20841979980469, 66.779052734375, 45.747642517089844, 123.02001953125, 113.36934661865234, 52.10114288330078, -49.133262634277344, 51.29206848144531, 20.460895538330078, 195.6309356689453, -62.058067321777344, 64.10504150390625, 49.619972229003906, 95.09568786621094, 44.76715087890625, 14.46591567993164, 31.771690368652344, -25.811614990234375, -38.01936340332031, -38.0406494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 43.582611083984375, "std": 77.67715454101562, "min": -175.9078369140625, "p10": -47.369808959960935, "median": 38.19831848144531, "p90": 153.11715393066407, "max": 263.57025146484375, "pos_frac": 0.6875, "sample": [48.85409927368164, 45.063629150390625, 139.9529266357422, -13.969337463378906, 94.18435668945312, -63.9881477355957, 104.77052307128906, 150.36361694335938, 37.565032958984375, 44.232444763183594, 176.32742309570312, 53.423377990722656, 52.70399475097656, 36.06752014160156, -95.34496307373047, 103.48017883300781, 163.302001953125, 4.440301895141602, -0.025003433227539062, 76.25875854492188, -73.07354736328125, -2.9998817443847656, 263.57025146484375, -175.9078369140625, 152.51480102539062, 52.03269958496094, 167.70477294921875, 153.6025390625, 76.4138412475586, 50.18840789794922, -10.198368072509766, -58.841400146484375, 109.00659942626953, -6.215305328369141, -19.449073791503906, 18.30803680419922, -84.52349853515625, -1.0507698059082031, 34.17724609375, 45.016239166259766, 91.77318572998047, -41.02056884765625, 55.63665771484375, -26.387969970703125, -12.278207778930664, 155.22227478027344, 33.32809829711914, 38.83160400390625, 22.819063186645508, 54.1390266418457, 18.460853576660156, -10.574350357055664, 145.391845703125, -1.2494659423828125, 12.490457534790039, 3.44580078125, -11.682701110839844, 55.90113067626953, 109.27212524414062, 10.799909591674805, -50.090911865234375, 132.69369506835938, 153.37530517578125, 1.051858901977539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 45.94677734375, "std": 89.3432388305664, "min": -156.84246826171875, "p10": -44.7892707824707, "median": 33.968496322631836, "p90": 153.31606750488282, "max": 392.49688720703125, "pos_frac": 0.671875, "sample": [2.5887317657470703, 33.332576751708984, -48.4259147644043, 116.58079528808594, -51.08686828613281, 129.93014526367188, 7.961082458496094, -44.36705017089844, 175.55105590820312, 28.20514678955078, -12.149642944335938, 113.3267822265625, 26.758148193359375, 45.560943603515625, 58.49648666381836, 76.99169921875, 155.48008728027344, -42.24644470214844, -32.75138854980469, 54.95143127441406, 229.11572265625, 88.03630065917969, 232.56942749023438, -99.73702239990234, 91.02740478515625, -98.05462646484375, -44.97022247314453, 42.16769027709961, 392.49688720703125, 146.36453247070312, 28.8901424407959, 22.115447998046875, 1.6694183349609375, 78.9832534790039, -11.7662353515625, -18.725330352783203, 34.60441589355469, 125.17109680175781, 32.909385681152344, -14.013946533203125, -76.8805160522461, -9.293317794799805, 53.09027099609375, 82.38497924804688, 151.5872802734375, -156.84246826171875, 17.08245277404785, 20.803804397583008, -7.815122604370117, -6.9134979248046875, 145.04542541503906, 181.69215393066406, 40.975521087646484, 50.55229187011719, 74.78071594238281, -18.662918090820312, 45.21812438964844, 54.578773498535156, -40.007789611816406, 87.5043716430664, 154.05697631835938, -30.66558074951172, 75.80155181884766, -1.021230697631836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 69.05598449707031, "std": 88.622802734375, "min": -113.54984283447266, "p10": -21.317277526855463, "median": 46.96364974975586, "p90": 203.77444458007815, "max": 338.5977783203125, "pos_frac": 0.78125, "sample": [75.76742553710938, 40.55814743041992, 33.076515197753906, 194.17001342773438, -14.394309997558594, 11.548965454101562, 213.41567993164062, 82.84126281738281, -59.633033752441406, -6.765851974487305, 16.7493953704834, 60.62871170043945, 32.721187591552734, 198.83526611328125, 212.19137573242188, 24.68224334716797, 97.72811889648438, -7.406761169433594, 37.592613220214844, 74.31580352783203, -12.86380386352539, 32.59661865234375, 9.808967590332031, 89.38777160644531, 94.96923828125, -3.2255630493164062, 32.264801025390625, 96.75104522705078, 59.639198303222656, 183.79010009765625, 145.27584838867188, 225.6162109375, -35.54612731933594, 30.356796264648438, 156.0208282470703, -110.24581909179688, 33.95576477050781, 30.997230529785156, 35.82157516479492, -24.284263610839844, 62.40985870361328, 52.5485725402832, 78.05876922607422, 253.6818389892578, 161.1556396484375, -7.846193313598633, 338.5977783203125, -42.78282928466797, -4.058109283447266, 89.67459869384766, 33.37621307373047, 41.378726959228516, 205.8912353515625, -34.430931091308594, 149.59034729003906, -113.54984283447266, 137.6530303955078, 1.8567962646484375, 130.1422119140625, 119.54891967773438, 223.22271728515625, 57.381797790527344, 31.867420196533203, 64.50514221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 69.1473159790039, "std": 76.66849517822266, "min": -161.4339599609375, "p10": -0.4982250213623047, "median": 52.58813667297363, "p90": 166.27880859375003, "max": 280.7750549316406, "pos_frac": 0.875, "sample": [53.41594314575195, 50.337127685546875, 53.93305206298828, 30.715190887451172, 280.7750549316406, -161.4339599609375, 61.08622360229492, 43.673805236816406, 31.354774475097656, 109.7914047241211, 116.74981689453125, 21.124114990234375, 63.26506042480469, 13.272863388061523, 103.40740966796875, 36.06501770019531, 41.75788116455078, -1.816680908203125, 147.1486358642578, 77.46859741210938, 45.94609832763672, 162.49212646484375, -13.76715087890625, -11.832290649414062, 51.76033020019531, 44.07914733886719, 86.60504150390625, 6.860622406005859, 208.99008178710938, 40.387332916259766, 41.71248245239258, -83.36083984375, 167.90167236328125, 10.381942749023438, 229.45831298828125, 4.231287002563477, 106.45439910888672, 31.06566619873047, 141.3967742919922, 47.0272102355957, 130.8780059814453, 32.82342529296875, 212.42376708984375, 26.074420928955078, -95.78045654296875, 130.93231201171875, 2.9556427001953125, 107.84087371826172, -0.5135612487792969, 25.892620086669922, -0.46244049072265625, 140.47520446777344, 8.584388732910156, 182.0531768798828, 160.07748413085938, 176.90328979492188, 91.82722473144531, 67.29005432128906, 29.885211944580078, 81.68644714355469, 136.65423583984375, 97.16040802001953, 101.45704650878906, 88.427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 56.553627014160156, "std": 90.65254974365234, "min": -162.45697021484375, "p10": -18.665310859680172, "median": 42.30354881286621, "p90": 147.8352996826172, "max": 390.89886474609375, "pos_frac": 0.8125, "sample": [15.658210754394531, 11.922657012939453, 155.13002014160156, -63.68946075439453, 70.36965942382812, 36.909645080566406, 67.44886779785156, 294.9065246582031, 325.71197509765625, 8.619941711425781, 73.68795776367188, 112.31543731689453, 21.77916145324707, 80.82080078125, -162.45697021484375, 2.4982452392578125, 229.4298095703125, -38.858924865722656, 71.28279876708984, -16.540021896362305, 2.2718734741210938, -13.950965881347656, 130.67257690429688, 71.0970458984375, 21.533226013183594, -19.576148986816406, 0.9223670959472656, 118.0173110961914, 8.43243408203125, 2.6602249145507812, -13.450439453125, -15.302490234375, 19.70859718322754, 96.64403533935547, 81.36332702636719, 149.13290405273438, 54.77276611328125, 67.55460357666016, 79.899658203125, 50.45127487182617, 55.517845153808594, 34.26069259643555, 34.843196868896484, 144.80755615234375, -106.65583801269531, 47.697452545166016, 123.63045501708984, -72.01703643798828, 13.21725082397461, 98.20851135253906, -21.998003005981445, 63.160865783691406, 107.41592407226562, 19.497413635253906, 62.15802001953125, -12.845672607421875, 34.133262634277344, 106.29292297363281, 26.840896606445312, 12.382844924926758, 183.45733642578125, 29.859241485595703, 54.86779022216797, 390.89886474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 49.97242736816406, "std": 79.4012680053711, "min": -112.15570068359375, "p10": -25.164176940917958, "median": 38.653621673583984, "p90": 152.75746154785162, "max": 260.2508544921875, "pos_frac": 0.765625, "sample": [11.077720642089844, 69.59519958496094, -10.242630004882812, 116.6678466796875, 7.004718780517578, -43.44203186035156, -7.639739990234375, 37.879364013671875, -76.35545349121094, 260.2508544921875, 56.440887451171875, 74.1345443725586, -6.7870941162109375, 105.87743377685547, -84.24116516113281, 20.500473022460938, 99.72074890136719, -5.97802734375, 224.6736297607422, 5.228532791137695, 67.23294830322266, 86.59701538085938, -12.375312805175781, 136.19451904296875, 0.32292938232421875, 112.07061767578125, 49.88287353515625, 80.77952575683594, 89.99964141845703, 72.0517578125, 11.061767578125, 18.72632598876953, 25.789236068725586, 253.11587524414062, -2.46173095703125, 49.522071838378906, 75.13907623291016, -75.48773193359375, 216.88082885742188, 39.41789245605469, 41.39227294921875, 37.88935089111328, 54.70709991455078, 247.26229858398438, -112.15570068359375, -15.65618896484375, 69.19493103027344, 180.81640625, 19.554580688476562, 51.77693557739258, 6.9809417724609375, 129.0786590576172, -59.6890869140625, 159.85586547851562, 107.1521224975586, -29.239028930664062, 65.90350341796875, 49.60691833496094, -8.173629760742188, 20.58131980895996, 5.7063446044921875, 0.16637039184570312, 10.183540344238281, 16.513511657714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 53.36433410644531, "std": 70.24101257324219, "min": -44.03272247314453, "p10": -33.61764583587646, "median": 44.8273811340332, "p90": 137.59532775878907, "max": 346.4635314941406, "pos_frac": 0.828125, "sample": [18.06005859375, -44.03272247314453, -40.642608642578125, 118.86305236816406, 205.79092407226562, 35.43647766113281, 19.52096176147461, 43.79787063598633, -35.94420623779297, 50.270713806152344, 125.89680480957031, 26.015003204345703, 12.389997482299805, 4.786857604980469, 51.341407775878906, 44.88453674316406, -28.18900489807129, 50.40159606933594, 79.17796325683594, 26.555130004882812, 5.8680572509765625, 12.347305297851562, 102.98033905029297, 17.40631103515625, 168.8269500732422, 1.586456298828125, 117.69692993164062, 98.62100219726562, 58.121944427490234, 18.785221099853516, 48.57359313964844, 139.00534057617188, 2.2120113372802734, 50.952110290527344, 173.22250366210938, 115.03704071044922, 134.3052978515625, -43.6602783203125, 89.79246520996094, -20.768890380859375, 64.36444091796875, -36.57905578613281, 68.57905578613281, 8.705984115600586, 346.4635314941406, 11.89168930053711, 81.86598205566406, 41.10902404785156, 77.18360900878906, -5.213785171508789, 66.62150573730469, -24.180450439453125, -36.21959686279297, 183.80801391601562, 58.970008850097656, 0.9443359375, 61.07914733886719, 39.34065246582031, 44.770225524902344, 72.84998321533203, -40.939735412597656, 83.72807312011719, 32.95450210571289, 157.9276123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 77.52986907958984, "std": 99.67030334472656, "min": -94.69509887695312, "p10": -35.12706146240234, "median": 54.2434196472168, "p90": 227.62265472412113, "max": 376.303955078125, "pos_frac": 0.78125, "sample": [29.752487182617188, 145.605712890625, 34.28097152709961, 250.93917846679688, 136.32879638671875, 78.68243408203125, 257.102783203125, 39.28438949584961, 260.0885314941406, 231.4691925048828, 139.57826232910156, 96.66163635253906, 202.55157470703125, 132.739501953125, 285.97088623046875, 124.117919921875, 31.76318359375, 213.34136962890625, 12.735275268554688, -45.0931396484375, -0.2227191925048828, 115.43988037109375, 165.12158203125, 78.034423828125, 41.30731201171875, 99.34907531738281, -0.9886932373046875, 376.303955078125, -23.93813705444336, 122.06590270996094, -64.98869323730469, 213.732666015625, 218.64739990234375, -77.28108215332031, 43.74288558959961, -17.244754791259766, 27.58115005493164, 58.94208526611328, 137.68865966796875, -5.2671356201171875, -61.15635681152344, 56.05900573730469, 89.01742553710938, 62.08018493652344, 0.8783302307128906, 52.427833557128906, 2.8913307189941406, -17.079849243164062, 48.69635772705078, 15.806182861328125, 35.16450119018555, 115.53987121582031, 97.958740234375, 91.96939849853516, 7.831428527832031, 107.99325561523438, 46.06524658203125, 240.89834594726562, 12.941932678222656, -94.69509887695312, -32.45118713378906, -36.27386474609375, -49.16944885253906, 2.5914993286132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 51.88257598876953, "std": 98.84121704101562, "min": -87.00767517089844, "p10": -49.80644073486328, "median": 28.721330642700195, "p90": 129.00509033203124, "max": 435.5111083984375, "pos_frac": 0.6875, "sample": [66.42117309570312, 86.73088073730469, 108.84005737304688, 105.66755676269531, -7.783851623535156, -51.06199264526367, 311.732666015625, 139.67425537109375, -17.469039916992188, 67.20761108398438, 105.96696472167969, 69.93946838378906, -30.37933349609375, 15.827701568603516, 77.09686279296875, 59.94677734375, 83.9630126953125, 34.946998596191406, 118.96003723144531, 349.1372985839844, -1.49749755859375, 69.52928161621094, 435.5111083984375, 0.45534515380859375, 118.25070190429688, 2.2437362670898438, 237.87274169921875, -10.803638458251953, 278.30706787109375, 25.31787872314453, 9.338005065917969, 0.8638954162597656, 60.84746551513672, -81.7896728515625, 58.75627136230469, -69.41531372070312, 21.554244995117188, -26.98979949951172, 94.49205017089844, 92.43696594238281, 10.361188888549805, -15.74618911743164, -15.964202880859375, 67.33123779296875, 11.257837295532227, 2.893972396850586, -27.068172454833984, 32.12478256225586, 96.24624633789062, 129.65231323242188, -51.100074768066406, -65.38021087646484, 37.49005126953125, 84.61458587646484, -87.00767517089844, -0.1396026611328125, 81.51896667480469, -23.97641372680664, -46.8768196105957, 24.64362335205078, -1.9053230285644531, 127.49490356445312, 3.5163345336914062, -64.14238739013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 81.0499267578125, "std": 119.945068359375, "min": -173.76858520507812, "p10": -28.84021301269531, "median": 52.65203857421875, "p90": 265.20419921875026, "max": 412.11871337890625, "pos_frac": 0.84375, "sample": [-44.68159484863281, 8.037403106689453, 83.11199951171875, 127.76879119873047, 157.86251831054688, 34.349220275878906, 50.48529052734375, 93.32960510253906, -23.64352798461914, 106.64675903320312, 4.830097198486328, 99.52067565917969, 402.27862548828125, 18.98158836364746, 39.76898193359375, -116.87445068359375, 116.6954345703125, 9.174312591552734, 38.31415557861328, 165.5769500732422, 162.535400390625, 84.83380889892578, 3.2404441833496094, 412.11871337890625, 114.24101257324219, 342.37164306640625, 200.96591186523438, 136.84877014160156, 18.0384578704834, 175.130859375, -101.40361022949219, 7.162200927734375, 73.10823822021484, 11.881980895996094, 137.18563842773438, 9.407646179199219, 132.41700744628906, 52.12196350097656, 6.182960510253906, 53.18211364746094, 65.34381866455078, 43.026527404785156, -5.684730529785156, -173.76858520507812, -47.49982452392578, -76.03604888916016, 387.691162109375, -31.067363739013672, 40.36675262451172, 315.11456298828125, 57.50788879394531, 135.22512817382812, 3.4281883239746094, 97.24113464355469, -20.796438217163086, 27.90950584411621, 56.36712646484375, 8.808269500732422, 0.2129344940185547, 110.4927749633789, 326.14837646484375, 41.53056335449219, 129.79428100585938, 292.7348937988281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 61.40470886230469, "std": 75.8693618774414, "min": -118.90811157226562, "p10": -19.49754943847656, "median": 43.93663215637207, "p90": 156.22514648437502, "max": 288.6038818359375, "pos_frac": 0.78125, "sample": [166.87646484375, -11.441162109375, 75.60459899902344, 140.81796264648438, 6.8190460205078125, 120.66890716552734, -19.867477416992188, -0.8898448944091797, 8.510833740234375, 181.78102111816406, 33.6849250793457, 23.659927368164062, 42.23554992675781, 11.073616027832031, 29.53516960144043, -118.90811157226562, 16.889070510864258, -24.607051849365234, -1.8302803039550781, -11.785514831542969, 200.1766357421875, 184.67669677734375, 15.253414154052734, 11.325714111328125, 124.0882568359375, 134.10865783691406, 140.27789306640625, -4.700920104980469, 97.5270767211914, 151.12205505371094, 60.665740966796875, 29.254079818725586, 130.0406951904297, 7.575653076171875, 146.90432739257812, 182.2411651611328, 80.83905029296875, 45.63771438598633, -72.97576904296875, 64.3413314819336, 69.87199401855469, 27.969764709472656, 94.95060729980469, 0.2333812713623047, -5.3428192138671875, -58.03425598144531, 101.57921600341797, 145.59686279296875, 13.413341522216797, 38.608299255371094, 115.92623901367188, 35.114707946777344, 140.5482635498047, -35.90515899658203, -21.721845626831055, 64.69451904296875, -18.634384155273438, 73.49417114257812, 23.441421508789062, 288.6038818359375, 108.71324920654297, 60.25843811035156, 110.90227508544922, 158.4121856689453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 72.78312683105469, "std": 90.80484008789062, "min": -112.64376831054688, "p10": -34.88429794311523, "median": 61.27261924743652, "p90": 169.93342437744144, "max": 393.2294616699219, "pos_frac": 0.78125, "sample": [-22.256698608398438, 162.35635375976562, 101.4527816772461, 179.5975799560547, -38.138084411621094, 69.58085632324219, 122.57938385009766, -13.537300109863281, 87.36468505859375, 151.4873046875, -112.64376831054688, 59.78614807128906, -47.94722366333008, 283.7507019042969, 145.49652099609375, 49.486656188964844, 90.9393310546875, 63.34137725830078, 35.19286346435547, 62.759090423583984, 151.8680419921875, 73.12003326416016, -73.01197052001953, 127.06110382080078, 50.71150207519531, -0.21639633178710938, 161.0830841064453, 86.23143005371094, 18.362916946411133, 39.19745635986328, 160.15997314453125, 30.4392032623291, 129.5590057373047, 122.0112533569336, 44.50518798828125, 100.22715759277344, -21.76781463623047, -1.6123371124267578, 10.067602157592773, 20.08769989013672, 85.06095123291016, -0.1231536865234375, 18.88011932373047, 155.7030029296875, 22.703716278076172, 280.1549987792969, 99.26920318603516, -27.292129516601562, 78.85881805419922, -48.275917053222656, 86.83385467529297, -49.70713806152344, 32.77092742919922, 88.29924011230469, 393.2294616699219, 37.88893508911133, 250.91590881347656, 40.213287353515625, 52.85149383544922, -44.50917053222656, 197.400634765625, 31.587051391601562, 43.49254608154297, 173.1807403564453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 66.00921630859375, "std": 93.3653793334961, "min": -136.47218322753906, "p10": -51.81831588745117, "median": 66.7362289428711, "p90": 180.64816284179693, "max": 359.67889404296875, "pos_frac": 0.734375, "sample": [-26.886341094970703, 127.03927612304688, 359.67889404296875, -85.08338165283203, -14.954734802246094, 128.5057373046875, 86.62035369873047, 149.9619598388672, 61.06322479248047, 169.78575134277344, 124.06465148925781, 76.48250579833984, 117.87435913085938, 42.768646240234375, 26.32202911376953, 89.11766052246094, 281.1154479980469, 13.331493377685547, 4.599597930908203, 77.55548858642578, 19.795562744140625, 67.77191162109375, 91.99246215820312, 139.00241088867188, -2.904998779296875, -58.858795166015625, 262.79962158203125, -53.26123046875, 38.01533508300781, 86.43827819824219, 26.10225486755371, 63.99138641357422, 10.010885238647461, 121.34683227539062, -33.81976318359375, 63.664581298828125, 95.56692504882812, 67.89911651611328, -10.848052978515625, 165.65118408203125, 136.89447021484375, -35.600685119628906, 185.30348205566406, -136.47218322753906, -2.4346771240234375, 65.70054626464844, 65.08757019042969, 95.31629180908203, 195.50152587890625, 24.83666229248047, -9.189859390258789, 165.5109100341797, -15.012672424316406, 21.694015502929688, -92.09754943847656, -53.49674606323242, -75.8953628540039, 69.03462219238281, 194.31011962890625, 186.10226440429688, -48.451515197753906, 89.34929656982422, 87.24581909179688, 142.03533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 62.32007598876953, "std": 98.57848358154297, "min": -247.82540893554688, "p10": -47.23284606933592, "median": 61.37077522277832, "p90": 188.488363647461, "max": 305.5462646484375, "pos_frac": 0.78125, "sample": [67.94387817382812, 40.29585647583008, -16.954158782958984, 104.4093246459961, 80.74479675292969, -32.134944915771484, 116.3924560546875, 68.77058410644531, 1.2353591918945312, -8.503669738769531, 59.29652786254883, 269.6689453125, -247.82540893554688, 50.36236572265625, -6.387580871582031, 34.49250793457031, 0.6486663818359375, -54.886962890625, 271.71417236328125, -61.80979919433594, -32.383277893066406, 206.7161407470703, 45.55665969848633, 88.02459716796875, 153.7220458984375, 83.94467163085938, 111.67561340332031, 4.275611877441406, -61.648162841796875, 74.29296875, 112.74330139160156, 79.463623046875, 34.101829528808594, 171.5723876953125, 263.8445129394531, 27.800264358520508, 117.43500518798828, 24.779489517211914, -18.177762985229492, 19.524574279785156, -28.56078338623047, 49.18769073486328, -53.596946716308594, 195.73806762695312, 4.75947380065918, 143.10888671875, 201.14520263671875, 124.46534729003906, 12.028791427612305, -169.11813354492188, 19.15966796875, 100.76296997070312, 63.44502258300781, 91.48243713378906, 118.5184326171875, 136.0767822265625, 305.5462646484375, 131.19711303710938, 1.1105213165283203, 168.27862548828125, 72.51527404785156, 106.45523071289062, -56.86408233642578, 6.9058685302734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 71.19023895263672, "std": 110.82365417480469, "min": -218.37850952148438, "p10": -43.39213256835936, "median": 38.2060661315918, "p90": 247.4781219482422, "max": 299.0489501953125, "pos_frac": 0.765625, "sample": [-22.67481231689453, 272.2423400878906, 34.63648223876953, 43.54278564453125, -30.38111114501953, 248.6676483154297, -25.397624969482422, -23.00838279724121, -91.8575439453125, 32.30356979370117, 137.403564453125, 95.85250854492188, 157.54861450195312, 28.04534339904785, 41.37452697753906, 226.76539611816406, 14.791370391845703, -218.37850952148438, -157.16781616210938, 77.63187408447266, 24.291175842285156, 34.63642883300781, 16.475261688232422, 299.0489501953125, 173.22731018066406, 35.748023986816406, 148.02914428710938, 23.286577224731445, 152.40005493164062, -24.4666805267334, 112.33837890625, 128.03536987304688, 254.1288299560547, 86.73065185546875, -3.004179000854492, 215.5666961669922, 222.1291961669922, 15.951435089111328, 19.52735710144043, 40.70075225830078, -62.00782775878906, -53.0884895324707, 3.77569580078125, 244.7025604248047, 70.67900848388672, 253.1916961669922, 15.787395477294922, 22.669227600097656, 74.27934265136719, -48.968284606933594, 60.51051712036133, -20.43072509765625, 6.006067276000977, -83.08331298828125, 197.59727478027344, 259.70062255859375, 26.942794799804688, 202.43417358398438, 26.944244384765625, -10.387212753295898, 127.11211395263672, 260.9122619628906, 40.66410827636719, 123.51090240478516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 45.91241455078125, "std": 97.37619018554688, "min": -163.76486206054688, "p10": -56.2401496887207, "median": 29.483315467834473, "p90": 149.99274597167968, "max": 347.6064758300781, "pos_frac": 0.65625, "sample": [85.74093627929688, -7.9386444091796875, 54.15599822998047, 26.015331268310547, 290.0459289550781, 93.86612701416016, -60.07464599609375, 107.8326187133789, 108.39164733886719, 23.904287338256836, 150.78128051757812, -83.58296203613281, -56.74549102783203, 141.93081665039062, 84.90312957763672, -130.5602569580078, -50.91284942626953, -3.2862071990966797, 108.18836975097656, -3.0556488037109375, 36.526763916015625, 264.3170166015625, -66.98294830322266, 101.08258056640625, 18.425872802734375, -18.872112274169922, 43.68878173828125, 148.15283203125, 34.89579772949219, 18.38418960571289, -10.309577941894531, -48.428497314453125, -55.06101989746094, 5.26286506652832, 140.89295959472656, 6.736747741699219, 47.58666229248047, 70.06349182128906, 28.992034912109375, 347.6064758300781, -35.06896209716797, -13.173511505126953, 35.33441925048828, 85.28277587890625, -2.9017410278320312, 56.2901611328125, 157.39447021484375, 143.99124145507812, -5.477375030517578, -50.54541015625, -91.50323486328125, 300.15069580078125, -38.00336456298828, 84.6692886352539, 70.70515441894531, 29.97459602355957, 115.14375305175781, 25.85742950439453, 187.43942260742188, -163.76486206054688, -12.124412536621094, 42.281341552734375, 6.487174987792969, 17.394622802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 55.82590866088867, "std": 82.75516510009766, "min": -115.34902954101562, "p10": -36.40312271118164, "median": 48.041629791259766, "p90": 180.8495880126953, "max": 252.61734008789062, "pos_frac": 0.734375, "sample": [-34.30360412597656, 113.18157196044922, -9.209440231323242, 178.9158935546875, -103.2614517211914, -21.09521484375, 68.81564331054688, 121.31598663330078, 48.94072723388672, 7.081798553466797, 66.96131134033203, -38.43059158325195, 78.28915405273438, 108.38229370117188, 60.14391326904297, 181.67831420898438, 52.93071746826172, 218.03994750976562, 106.18260955810547, 148.47769165039062, 185.0574951171875, 198.28536987304688, -27.43638801574707, -115.34902954101562, 109.57764434814453, -24.590972900390625, 49.04292297363281, -3.7415084838867188, 63.76624298095703, 23.00470542907715, -57.650779724121094, 72.69261169433594, 70.47142791748047, 20.133392333984375, -50.695716857910156, 143.75726318359375, 47.95501708984375, 27.96975326538086, 22.971330642700195, 96.59005737304688, 29.187118530273438, 96.81842803955078, 252.61734008789062, 26.547807693481445, 28.423309326171875, 77.39251708984375, -34.697166442871094, 23.574172973632812, 19.855117797851562, 105.50369262695312, 123.91901397705078, 242.8457489013672, -1.561187744140625, -37.134246826171875, -20.143341064453125, -22.503822326660156, 235.35557556152344, 15.425189971923828, 24.171934127807617, 2.1337203979492188, 48.12824249267578, 24.735191345214844, 155.62034606933594, -48.20471954345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 60.55756759643555, "std": 85.36116027832031, "min": -127.57621765136719, "p10": -43.815913772582995, "median": 44.425132751464844, "p90": 161.07529602050784, "max": 273.55609130859375, "pos_frac": 0.75, "sample": [24.05512237548828, -51.70625305175781, -103.31294250488281, 273.55609130859375, 158.39541625976562, 123.18292999267578, 258.9437561035156, 1.7953681945800781, -127.57621765136719, 55.90131759643555, 5.173736572265625, 76.17356872558594, 152.00807189941406, -56.9081916809082, 10.79384994506836, 129.12930297851562, 56.91791534423828, 45.106407165527344, 125.9935302734375, -48.279998779296875, -13.253862380981445, 40.830841064453125, 43.34853744506836, 144.05258178710938, 93.22689819335938, -67.61688232421875, -13.123092651367188, 32.56778335571289, -7.8753509521484375, 193.51498413085938, 24.453832626342773, 15.442718505859375, 121.44558715820312, -33.399715423583984, -56.86785888671875, 125.20094299316406, 18.831626892089844, 22.85285758972168, 84.7015151977539, -10.984130859375, 69.57504272460938, -5.987064361572266, 123.36524200439453, -21.24026870727539, 122.09622192382812, 196.1672821044922, 107.41212463378906, -23.574283599853516, 46.470157623291016, 126.97713470458984, 30.003597259521484, 43.743858337402344, 147.84767150878906, 165.47222900390625, 60.78489303588867, 19.429981231689453, -1.9724349975585938, 67.89201354980469, 142.59706115722656, 124.2716293334961, 252.45016479492188, 43.47248458862305, 9.513130187988281, 162.22381591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 46.574520111083984, "std": 92.40666961669922, "min": -247.99734497070312, "p10": -74.03463363647461, "median": 59.325754165649414, "p90": 164.27790985107427, "max": 252.9536895751953, "pos_frac": 0.671875, "sample": [33.86376953125, -38.127418518066406, 108.23345947265625, -68.34326171875, 20.261064529418945, 1.9740524291992188, 78.73778533935547, 65.97732543945312, 81.177978515625, 68.87301635742188, 214.28973388671875, 107.3388900756836, 46.31507873535156, -16.354446411132812, 26.421058654785156, -9.479728698730469, 97.4687728881836, -5.609375, 41.85419464111328, -20.67301368713379, -5.638193130493164, 32.10807800292969, 64.4487075805664, 124.05000305175781, -107.18476867675781, 69.2136001586914, 154.37452697753906, -247.99734497070312, -76.47379302978516, 219.26785278320312, -81.6701889038086, 75.12724304199219, -6.024881362915039, 74.6865234375, 198.22276306152344, -80.02147674560547, 52.6746826171875, 63.16206359863281, 123.5672836303711, 252.9536895751953, 4.89306640625, -11.503097534179688, 57.2532844543457, 85.9728012084961, 61.47486877441406, 104.15926361083984, 105.02428436279297, -101.35304260253906, 61.398223876953125, -79.7635498046875, 83.57267761230469, 144.8099365234375, 168.522216796875, 76.11626434326172, -2.8543930053710938, 111.56348419189453, 39.27488708496094, -39.86784362792969, 91.13525390625, -56.20830535888672, 235.06895446777344, -36.748748779296875, 200.51419067382812, -54.73065185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 63.08330535888672, "std": 95.2734603881836, "min": -157.0152587890625, "p10": -33.3446159362793, "median": 48.224708557128906, "p90": 202.4678405761719, "max": 299.9554443359375, "pos_frac": 0.765625, "sample": [108.90664672851562, -71.52485656738281, 41.165191650390625, 243.9844970703125, -30.17742156982422, -9.355369567871094, 100.4930419921875, 102.44153594970703, 51.865203857421875, -42.202491760253906, 30.89052391052246, 81.31110382080078, 63.273399353027344, 131.57830810546875, 28.256732940673828, 206.83148193359375, 118.30810546875, 18.68231964111328, 1.401702880859375, 185.13055419921875, 164.10035705566406, -27.898658752441406, 179.85281372070312, 26.226259231567383, 294.61151123046875, 48.513710021972656, 213.30007934570312, 113.7752685546875, -14.033170700073242, 47.935707092285156, 33.982845306396484, -146.76095581054688, 5.531597137451172, 119.65509033203125, 111.21658325195312, -30.945175170898438, 64.69483184814453, 299.9554443359375, 80.89093780517578, 24.318866729736328, 192.2860107421875, 15.505767822265625, -16.15079689025879, 15.470237731933594, 34.47615051269531, 61.35871887207031, -15.763076782226562, 84.66180419921875, 67.75328063964844, -34.372947692871094, 11.33050537109375, 55.36483383178711, 17.11068344116211, 181.4574737548828, -26.82402801513672, 58.820072174072266, 253.32281494140625, -36.66090393066406, 34.5462646484375, -157.0152587890625, 223.30703735351562, -62.55274963378906, 64.22980499267578, 45.485801696777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 54.566932678222656, "std": 87.42639923095703, "min": -107.55546569824219, "p10": -31.769382476806637, "median": 36.339359283447266, "p90": 152.84022521972656, "max": 361.33697509765625, "pos_frac": 0.796875, "sample": [-5.433704376220703, 53.887245178222656, 4.752349853515625, 87.83721923828125, -66.10277557373047, 110.47765350341797, 77.75643920898438, 40.93327713012695, -42.20099639892578, 208.07679748535156, 21.80272674560547, 151.16680908203125, 21.38112449645996, -8.343086242675781, 156.62232971191406, 259.7926025390625, 4.894569396972656, 21.141143798828125, -43.53929901123047, -74.44625854492188, 138.48362731933594, 136.24569702148438, -12.192209243774414, 95.71640014648438, 100.27582550048828, 45.08098602294922, 65.39214324951172, 51.434513092041016, 111.9713134765625, 1.7123546600341797, 153.55740356445312, 31.13275146484375, 36.43778991699219, 76.0888671875, -32.85205078125, 49.522891998291016, 36.05180358886719, 18.607711791992188, 117.5441665649414, 62.25252914428711, 132.1658935546875, 35.27580261230469, -89.56123352050781, 47.76038360595703, 39.987815856933594, 22.180038452148438, 195.57696533203125, 50.43218231201172, -5.313056945800781, -29.24315643310547, -107.55546569824219, 36.90187072753906, 9.977331161499023, 6.43916130065918, 36.240928649902344, 7.091398239135742, 35.59453582763672, 361.33697509765625, 339.2474365234375, 8.963516235351562, -0.9883365631103516, 0.37783050537109375, 92.3973388671875, 4.076940536499023], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 67.35143280029297, "std": 85.50799560546875, "min": -194.53404235839844, "p10": -9.726575279235838, "median": 60.090030670166016, "p90": 157.63550262451173, "max": 319.2630615234375, "pos_frac": 0.828125, "sample": [123.09984588623047, 70.4764404296875, 214.16595458984375, 60.253604888916016, 109.98206329345703, 74.39825439453125, 73.55451202392578, -8.747579574584961, -20.116439819335938, -10.14614486694336, 22.923574447631836, -10.19952392578125, 59.926456451416016, 44.7042236328125, 61.73780822753906, -67.28341674804688, 91.8599853515625, 95.20062255859375, 45.52703857421875, 139.52456665039062, 118.53422546386719, 178.75421142578125, 112.07221221923828, 30.730932235717773, 14.873197555541992, 100.57438659667969, 50.50696563720703, 130.66412353515625, 78.25125122070312, 137.99432373046875, 69.6479721069336, -1.2664852142333984, 319.2630615234375, 42.762847900390625, 157.72824096679688, 25.7001953125, 232.09036254882812, 64.68006134033203, 30.213973999023438, 113.87899017333984, 24.73959732055664, 73.60260009765625, 9.824686050415039, 45.459510803222656, -1.8545150756835938, 57.08903503417969, 13.369709014892578, 60.636741638183594, -17.075897216796875, -194.53404235839844, 100.70941162109375, 10.560218811035156, 3.935028076171875, 157.4191131591797, 11.072187423706055, 293.01910400390625, 17.542186737060547, 41.714019775390625, 144.8204345703125, 276.2109680175781, 8.118141174316406, -7.271488189697266, 93.06881713867188, -90.18059539794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 65.94608306884766, "std": 97.08871459960938, "min": -86.69010925292969, "p10": -21.088378906249993, "median": 39.98025703430176, "p90": 175.79598846435547, "max": 437.87469482421875, "pos_frac": 0.828125, "sample": [300.39593505859375, 320.2662353515625, 24.49669647216797, 31.17913818359375, 61.73631286621094, -40.403236389160156, 191.346435546875, 39.48167419433594, -75.66992950439453, 72.94316864013672, 437.87469482421875, -4.891759872436523, -15.336738586425781, 19.6253662109375, -10.935714721679688, 140.3769989013672, 22.534927368164062, 49.76929473876953, 49.01213073730469, 78.89131927490234, 40.47883987426758, 64.46428680419922, 150.2998809814453, 29.54949951171875, 110.73699951171875, 89.17522430419922, 199.00193786621094, 137.38243103027344, 16.62683868408203, 68.07444763183594, 15.403831481933594, -52.87432861328125, 84.00818634033203, 177.26246643066406, 72.38095092773438, 8.173006057739258, 172.37420654296875, 23.2509822845459, 150.20556640625, -23.553367614746094, 96.80476379394531, -40.25267791748047, -44.915321350097656, 5.468696594238281, 125.77806854248047, 6.432727813720703, 31.29535675048828, 64.58943939208984, 18.844947814941406, 40.73210906982422, 61.61631774902344, 71.94883728027344, 66.24140167236328, 20.62823486328125, 30.914152145385742, 353.26409912109375, 14.866079330444336, 17.473018646240234, 12.98320198059082, 32.34814453125, 103.60759735107422, -10.239799499511719, -86.69010925292969, 1.6951789855957031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 66.66267395019531, "std": 83.35469818115234, "min": -147.91064453125, "p10": -29.505813217163084, "median": 57.627899169921875, "p90": 189.84090576171877, "max": 259.30047607421875, "pos_frac": 0.84375, "sample": [-147.91064453125, 228.41429138183594, 49.84776306152344, 0.7941665649414062, 45.255157470703125, 59.327972412109375, 66.62532806396484, 118.3583984375, 71.00303649902344, 28.463699340820312, 184.4786376953125, -45.979759216308594, -30.92138671875, 108.93614196777344, 58.42864227294922, -46.50104904174805, -4.815372467041016, 44.46514892578125, 60.144927978515625, 48.980812072753906, 243.04803466796875, 55.87869644165039, 149.2376708984375, 56.94691467285156, -57.94629669189453, 183.87747192382812, 27.332061767578125, 11.042022705078125, -26.202808380126953, 21.198654174804688, 34.648460388183594, 6.882299423217773, 20.16427230834961, 9.229372024536133, 115.04318237304688, 186.31838989257812, -65.51068115234375, 63.49982452392578, 110.47045135498047, 105.62887573242188, 8.232458114624023, 60.687767028808594, 104.410400390625, 82.928466796875, 10.58917236328125, 202.81442260742188, 109.03323364257812, 58.30888366699219, 126.45303344726562, -7.581489562988281, 222.80003356933594, 8.184410095214844, 3.893951416015625, 145.56890869140625, 15.408767700195312, 15.345804214477539, 210.06478881835938, 118.45421600341797, 259.30047607421875, 191.35055541992188, -52.72239685058594, 80.41319274902344, 23.684783935546875, 120.6046142578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 80.85972595214844, "std": 98.66983032226562, "min": -88.08016967773438, "p10": -41.20511474609374, "median": 61.42044639587402, "p90": 223.58062286376955, "max": 331.85467529296875, "pos_frac": 0.796875, "sample": [30.463272094726562, 60.66223907470703, 172.66004943847656, 180.98365783691406, 76.37542724609375, 210.30599975585938, 62.178653717041016, 148.22760009765625, 331.85467529296875, 39.01368713378906, 159.2518310546875, 132.84654235839844, 37.98921585083008, -67.31367492675781, -3.57232666015625, 232.16864013671875, 141.69357299804688, 53.00212860107422, 69.87397766113281, 63.06494140625, 55.84459686279297, 53.21195983886719, -48.616485595703125, 80.69561767578125, 0.909881591796875, 324.21514892578125, 135.93081665039062, 234.5095672607422, 44.84825134277344, 27.047706604003906, 64.79739379882812, -47.377655029296875, 21.127193450927734, -7.4103851318359375, 1.0714168548583984, 60.18682861328125, 19.03626251220703, 209.03292846679688, 30.28797149658203, 49.195030212402344, 302.2879943847656, -24.321067810058594, 54.29560852050781, 217.76226806640625, 68.52001953125, -35.434173583984375, 244.43482971191406, 36.08583068847656, -77.8727798461914, 109.18922424316406, 2.404172897338867, 145.32012939453125, 93.07345581054688, 75.8291244506836, 226.07420349121094, -47.97466278076172, 168.87850952148438, -88.08016967773438, -9.1619873046875, 121.44612121582031, -43.678375244140625, -32.807838439941406, 142.35382080078125, 86.12407684326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 69.02678680419922, "std": 106.34500122070312, "min": -185.70147705078125, "p10": -35.822275924682614, "median": 56.94235610961914, "p90": 207.03205261230474, "max": 439.1776123046875, "pos_frac": 0.75, "sample": [89.11750793457031, -4.126457214355469, 11.129180908203125, -8.201889038085938, 17.927989959716797, -9.692771911621094, 28.000457763671875, 163.2825927734375, -42.104034423828125, 194.03663635253906, 96.04534912109375, 7.651908874511719, -46.278072357177734, 85.21754455566406, 253.57069396972656, -71.0114517211914, 80.46058654785156, 16.983901977539062, 57.54358673095703, 106.73064422607422, 56.34112548828125, 439.1776123046875, 42.164207458496094, 67.26290893554688, 83.12139892578125, 188.3153076171875, -28.688758850097656, -44.36072540283203, 212.6015167236328, 219.1640625, 145.49388122558594, 42.516441345214844, 258.66680908203125, 6.2613067626953125, 1.8389225006103516, 1.2914581298828125, 171.95501708984375, 99.8998794555664, -34.73234176635742, -36.289390563964844, 132.5556640625, 10.751632690429688, 14.479507446289062, 281.1433410644531, 23.796249389648438, -15.291492462158203, -64.17241668701172, 65.6044921875, 7.050376892089844, -185.70147705078125, 91.30842590332031, 370.114990234375, 79.927490234375, -31.308513641357422, 68.37566375732422, -3.9572010040283203, 98.80029296875, 46.038978576660156, 135.64910888671875, 140.06591796875, 82.09686279296875, -21.78807830810547, 67.61341094970703, 106.27680969238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 65.80543518066406, "std": 110.37510681152344, "min": -215.87451171875, "p10": -56.60406341552734, "median": 57.77350616455078, "p90": 199.192546081543, "max": 311.5793762207031, "pos_frac": 0.65625, "sample": [-11.042821884155273, 59.239051818847656, -215.87451171875, 25.339204788208008, 311.5793762207031, 56.307960510253906, -11.721990585327148, 156.97567749023438, 185.4923095703125, -56.70316696166992, 93.70764923095703, 247.85658264160156, -95.19345092773438, 131.90280151367188, 121.08651733398438, 60.77022933959961, 2.9982757568359375, 24.294525146484375, 36.912410736083984, -5.454690933227539, 117.661376953125, 60.48835372924805, -7.293422698974609, 65.76329040527344, -32.41119384765625, 198.44485473632812, -56.37282180786133, 220.20681762695312, 157.11386108398438, -97.859375, -37.382896423339844, 184.4251708984375, -77.42168426513672, 177.05551147460938, -53.64410400390625, 207.24476623535156, 308.03887939453125, 178.55078125, -4.8537445068359375, 181.94432067871094, 199.5129852294922, 121.73760986328125, 22.250041961669922, 84.32208251953125, 194.3396759033203, -10.155067443847656, -17.275222778320312, -6.838472366333008, -92.86035919189453, 121.10804748535156, 159.96397399902344, -26.7559814453125, 4.795587539672852, 28.10784912109375, 11.861083984375, -80.17530822753906, 110.72926330566406, 145.38580322265625, 122.31111907958984, -54.70111083984375, -9.37237548828125, 0.5239391326904297, 85.90310668945312, 288.65924072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 66.6856689453125, "std": 99.27825927734375, "min": -214.59535217285156, "p10": -24.26785049438476, "median": 64.33493041992188, "p90": 173.29172363281256, "max": 371.341064453125, "pos_frac": 0.734375, "sample": [-9.772613525390625, 100.43900299072266, 61.95277404785156, -6.3358154296875, 100.14213562011719, 13.374618530273438, 80.9194107055664, -29.94769287109375, 75.55057525634766, 22.874370574951172, -9.714492797851562, 64.70347595214844, 109.92790985107422, -88.356689453125, 114.98604583740234, 77.07150268554688, -27.066173553466797, 49.71062469482422, 63.96638488769531, -3.822357177734375, 80.68746948242188, 71.6904067993164, 50.55873107910156, 371.341064453125, 192.96876525878906, 9.173971176147461, 143.91546630859375, 108.10367584228516, 6.844337463378906, -130.82908630371094, 25.153152465820312, -1.1623382568359375, 25.934600830078125, 99.18643188476562, 197.4111328125, -214.59535217285156, 297.20318603515625, 45.551307678222656, 135.98561096191406, -17.432533264160156, 1.3011665344238281, 100.86070251464844, 156.29966735839844, 44.970458984375, 289.0538635253906, 100.24174499511719, 296.4208984375, 157.361083984375, -86.6923828125, 104.23587036132812, -6.484893798828125, 180.119140625, -14.67770767211914, 114.0482406616211, 90.12603759765625, -52.257896423339844, 62.52245330810547, 134.42401123046875, 126.93553924560547, 13.180744171142578, 142.33786010742188, -3.2581653594970703, -17.73843002319336, 76.25952911376953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 38.129791259765625, "std": 112.19856262207031, "min": -287.41259765625, "p10": -106.76282958984373, "median": 23.927188873291016, "p90": 140.39141693115235, "max": 301.01275634765625, "pos_frac": 0.71875, "sample": [136.185302734375, 62.57102966308594, 301.01275634765625, 263.362548828125, 179.79226684570312, 114.41781616210938, -208.79586791992188, 4.35595703125, -88.9241943359375, 21.167617797851562, -204.4611053466797, 136.41795349121094, 103.25345611572266, -25.16565704345703, -65.4066162109375, -140.3153076171875, 43.60483169555664, 116.57585144042969, 264.29254150390625, 82.21479797363281, 23.828567504882812, 124.96438598632812, 52.124847412109375, 19.67348861694336, 4.295564651489258, -12.403343200683594, 15.161190032958984, 30.26861572265625, 264.0782470703125, -19.743125915527344, 251.01702880859375, 142.09432983398438, 124.03850555419922, 72.7244644165039, 9.990028381347656, 8.241239547729492, 79.23851013183594, -127.70547485351562, -287.41259765625, -80.04813385009766, 85.83460235595703, -55.42799377441406, -4.3767242431640625, 131.93087768554688, 17.588455200195312, -114.407958984375, 87.17627716064453, 124.97130584716797, 81.07078552246094, 6.65953254699707, -4.056386947631836, 78.98139953613281, 62.302879333496094, 55.13121032714844, -117.2400131225586, -51.92967224121094, 10.973068237304688, 20.661956787109375, 5.27099609375, -3.7772140502929688, 24.02581024169922, 79.6029281616211, 0.21012496948242188, 128.548095703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 85.21761322021484, "std": 109.9005126953125, "min": -305.9610595703125, "p10": -29.12902450561523, "median": 78.19539260864258, "p90": 210.8664276123047, "max": 402.2113037109375, "pos_frac": 0.84375, "sample": [165.74676513671875, 171.66891479492188, 91.11780548095703, 204.2514190673828, 136.13845825195312, 11.904388427734375, 80.77490997314453, 150.12091064453125, 2.2403945922851562, 161.02761840820312, 59.84651565551758, 224.36050415039062, 51.699432373046875, 93.5313720703125, 237.35394287109375, 213.70143127441406, 155.62789916992188, 108.25486755371094, 39.23033142089844, 163.33953857421875, 9.834022521972656, 40.80821228027344, 75.61587524414062, -29.984539031982422, 118.57696533203125, 264.33941650390625, -4.4630584716796875, 32.322818756103516, 0.978363037109375, 131.4193572998047, -305.9610595703125, -38.412742614746094, 26.831039428710938, 53.64350891113281, 126.20738220214844, 82.89917755126953, 259.9104309082031, 140.88809204101562, 106.74354553222656, -34.86759948730469, 31.112003326416016, 297.3910827636719, 34.45597839355469, 66.25698852539062, 5.243297576904297, 180.9362030029297, 111.76042175292969, -27.132823944091797, 175.90682983398438, 132.21310424804688, 197.3160400390625, 58.2073974609375, -63.361324310302734, 21.436874389648438, 192.4893798828125, 163.33755493164062, 402.2113037109375, 57.934654235839844, 41.875282287597656, -147.45770263671875, 14.164703369140625, -63.361785888671875, 14.246585845947266, -22.521621704101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 90.48815155029297, "std": 138.80062866210938, "min": -185.09579467773438, "p10": -49.43940620422362, "median": 53.96138381958008, "p90": 291.95450744628914, "max": 508.8758239746094, "pos_frac": 0.71875, "sample": [8.523761749267578, 11.44852066040039, 45.27543640136719, 53.247291564941406, 54.67547607421875, -29.997589111328125, 34.227291107177734, 90.81212615966797, 39.618011474609375, -59.60805130004883, 221.41537475585938, -118.73431396484375, 264.58447265625, 83.6363296508789, -15.61456298828125, -62.3668212890625, -15.671483993530273, 115.93028259277344, -13.899824142456055, 16.293838500976562, 237.07534790039062, -40.17833709716797, 297.79034423828125, -185.09579467773438, 312.72381591796875, 77.45276641845703, 146.1949005126953, 21.10544776916504, 40.68446350097656, 12.528949737548828, 73.85044860839844, 90.75959014892578, -21.131240844726562, 158.48040771484375, -59.67349624633789, -3.4470977783203125, 354.54986572265625, 106.21369171142578, -120.32064056396484, 356.44976806640625, 437.629638671875, -3.3575897216796875, 69.4550552368164, 508.8758239746094, 17.673583984375, 9.752906799316406, -53.4084358215332, 123.12973022460938, 246.68551635742188, 116.29967498779297, 391.8951416015625, 174.2630615234375, 103.41096496582031, 180.82969665527344, -17.89436912536621, 188.13275146484375, 113.88337707519531, -8.530614852905273, 26.406105041503906, 104.22444915771484, 278.3375549316406, 50.626625061035156, 171.588134765625, -18.47562026977539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 61.85674285888672, "std": 110.80256652832031, "min": -215.62977600097656, "p10": -62.98472518920899, "median": 59.96918296813965, "p90": 201.5517730712891, "max": 323.6943664550781, "pos_frac": 0.75, "sample": [10.10003662109375, 47.377906799316406, 140.53123474121094, -49.705474853515625, 266.486572265625, 150.66970825195312, -215.62977600097656, -39.2918815612793, 125.36625671386719, 175.8927001953125, 93.83970642089844, -62.74559020996094, 13.278505325317383, -28.8414306640625, 96.25404357910156, -80.2049789428711, -135.97879028320312, 61.059879302978516, 192.05453491210938, 154.05857849121094, 323.6943664550781, 9.939586639404297, -48.224761962890625, 18.593780517578125, 30.968765258789062, -80.30409240722656, 20.603851318359375, -50.3436279296875, 94.03755187988281, 56.259002685546875, -23.273391723632812, 203.20452880859375, 90.3481216430664, 16.180255889892578, 125.3963394165039, -22.271894454956055, 58.87848663330078, -106.57901000976562, -3.9896011352539062, 230.11965942382812, 153.65432739257812, 232.22494506835938, 63.78455352783203, 75.86802673339844, 2.326416015625, 289.9681091308594, 12.940397262573242, 1.5149288177490234, 75.86231231689453, 54.664398193359375, 140.82498168945312, 31.909324645996094, -63.08721160888672, -174.47479248046875, 73.41822052001953, 105.9593505859375, 97.41504669189453, 92.85945129394531, 32.431968688964844, 318.7066650390625, 197.69534301757812, 72.06614685058594, 80.585205078125, 131.90374755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 83.32547760009766, "std": 135.57952880859375, "min": -158.1006317138672, "p10": -72.66752700805664, "median": 66.15788650512695, "p90": 253.14522705078127, "max": 482.4923095703125, "pos_frac": 0.6875, "sample": [109.65848541259766, -14.58552360534668, 192.86514282226562, 159.4966583251953, -14.815021514892578, -74.83808898925781, 206.24453735351562, 191.3896026611328, 177.15960693359375, 13.284069061279297, -84.69161224365234, -9.166534423828125, 144.90164184570312, 244.92385864257812, 121.75749969482422, -78.07815551757812, 44.1815071105957, -37.21922302246094, -152.76712036132812, 238.1451873779297, 280.59283447265625, -123.60929870605469, 256.6686706542969, -63.10662078857422, 145.56629943847656, 46.93377685546875, 6.930793762207031, -15.535659790039062, 482.4923095703125, 66.60398864746094, 14.823968887329102, 65.71178436279297, -25.83563232421875, 419.72802734375, 2.4364776611328125, 80.68843078613281, 119.0852279663086, 175.30003356933594, 394.09228515625, 312.92633056640625, 28.088623046875, 230.53793334960938, -158.1006317138672, 212.1661376953125, 95.0988998413086, 14.400985717773438, -30.791223526000977, 18.730756759643555, 121.7685775756836, 117.49038696289062, -38.827964782714844, 34.455406188964844, 121.66822814941406, -67.6028823852539, 100.84552001953125, 82.44023132324219, -10.127557754516602, -0.5569667816162109, -101.0925064086914, 168.3789825439453, 34.03645324707031, 261.43841552734375, -40.37059020996094, 118.41455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 72.9051284790039, "std": 107.78850555419922, "min": -140.17800903320312, "p10": -51.394773101806635, "median": 49.90380668640137, "p90": 242.15778045654307, "max": 332.47967529296875, "pos_frac": 0.765625, "sample": [79.26009368896484, 201.48007202148438, -53.50458526611328, 131.9129638671875, 314.2151184082031, -140.17800903320312, -54.177886962890625, -72.78085327148438, 108.15058898925781, -44.12328338623047, 45.1571044921875, 105.45469665527344, 80.82359313964844, -74.20508575439453, 3.6011695861816406, 129.5783233642578, 332.47967529296875, 88.44001770019531, 144.94302368164062, 272.92474365234375, 18.40033721923828, 54.650508880615234, -7.0719146728515625, -37.65339660644531, 35.10943603515625, -12.19412612915039, 136.74819946289062, 41.265052795410156, 287.1431884765625, 305.3055419921875, 137.34426879882812, -33.533233642578125, 6.219264984130859, -28.961483001708984, 251.41741943359375, 115.7810287475586, 37.697052001953125, -46.47187805175781, 18.496749877929688, 83.48551940917969, -14.127212524414062, 32.817359924316406, 13.201087951660156, 32.061866760253906, 16.617958068847656, 28.246536254882812, 215.73355102539062, 77.30422973632812, 16.972206115722656, 151.99200439453125, 220.5519561767578, -106.1322021484375, 81.99894714355469, 175.672119140625, 36.511016845703125, 42.697479248046875, 34.29641342163086, 127.22843933105469, 93.98100280761719, -93.52843475341797, 58.29165267944336, 267.8558349609375, 119.34194946289062, 73.7132797241211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 71.90221405029297, "std": 114.65283966064453, "min": -181.5937957763672, "p10": -48.83814697265625, "median": 63.280418395996094, "p90": 209.6553161621094, "max": 463.0406188964844, "pos_frac": 0.75, "sample": [0.21242523193359375, 52.266849517822266, 111.36740112304688, 256.031494140625, 66.50865173339844, 222.98696899414062, 33.48046112060547, 141.3066864013672, 46.466712951660156, -7.617399215698242, -22.556114196777344, 203.5516357421875, 69.93087005615234, -114.61251068115234, -42.52458190917969, -19.33746337890625, 167.6370086669922, 168.25454711914062, -7.494842529296875, 129.80422973632812, 79.0360336303711, 28.7537841796875, 59.73686218261719, 8.0589599609375, 21.8205623626709, 212.27117919921875, 135.22291564941406, -40.17808532714844, -181.5937957763672, 151.28904724121094, 104.5914535522461, -115.88279724121094, -14.625558853149414, 17.74880027770996, 155.32235717773438, 75.26162719726562, 242.48809814453125, 463.0406188964844, 64.84989166259766, 116.01371002197266, 86.06855010986328, 99.96623229980469, 161.08323669433594, 169.51348876953125, 154.65835571289062, -1.1941375732421875, -51.54396057128906, 3.6637191772460938, 98.12956237792969, 41.82445526123047, -73.93125915527344, 172.65965270996094, 3.0415496826171875, -98.6649398803711, 399.275390625, 24.15240478515625, -115.87296295166016, 32.61701965332031, 82.43775939941406, 84.11202239990234, 61.71094512939453, -35.58421325683594, 38.967041015625, 225.76315307617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 70.35995483398438, "std": 112.81494140625, "min": -169.76568603515625, "p10": -50.76592636108397, "median": 63.77787780761719, "p90": 260.1042724609375, "max": 360.70452880859375, "pos_frac": 0.671875, "sample": [1.5807571411132812, -9.717887878417969, 264.281494140625, 360.70452880859375, -28.521404266357422, -126.99516296386719, 96.9916000366211, -6.703033447265625, -9.933074951171875, 83.03480529785156, 140.14468383789062, -30.120216369628906, 102.00962829589844, 328.76971435546875, 109.7997817993164, 45.55406188964844, 104.33707427978516, -13.984466552734375, 16.17784881591797, -61.60869598388672, 58.440521240234375, 164.29440307617188, 108.94692993164062, -40.52924346923828, 101.3609619140625, -0.6633186340332031, 124.57447814941406, -169.76568603515625, -70.21739959716797, 114.54637908935547, 273.94512939453125, 13.397457122802734, 145.0218505859375, 12.251256942749023, 70.82196044921875, -11.146875381469727, -55.153076171875, 308.28289794921875, 130.42538452148438, 72.82135009765625, -68.11927795410156, 37.62670135498047, -40.36767578125, 250.357421875, 46.017051696777344, 109.43950653076172, -40.200782775878906, 293.11920166015625, 147.36036682128906, 31.74092674255371, 64.41412353515625, 112.79409790039062, 63.141632080078125, 190.07470703125, 84.68001556396484, -22.627357482910156, 82.59159851074219, 16.774948120117188, -97.27033233642578, -2.7888355255126953, 279.9139709472656, 92.629150390625, 167.43853759765625, -13.160362243652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 85.5523681640625, "std": 107.53472900390625, "min": -154.96890258789062, "p10": -22.494071006774902, "median": 73.1141242980957, "p90": 214.9609191894532, "max": 439.6043701171875, "pos_frac": 0.828125, "sample": [-146.9334259033203, 174.52142333984375, 169.46160888671875, 166.34701538085938, 228.06109619140625, 54.22480010986328, 80.32579040527344, 132.0460662841797, 73.26254272460938, 77.79877471923828, 248.28948974609375, 50.444053649902344, 23.849077224731445, 52.308895111083984, -70.23619079589844, 134.16000366210938, 66.94950103759766, 67.2632064819336, -22.57830238342285, 72.96570587158203, 8.601240158081055, 135.973388671875, 75.2662582397461, 201.7235107421875, 30.885759353637695, 155.78912353515625, -30.145061492919922, -91.81925201416016, 152.88104248046875, 15.839971542358398, 44.68898010253906, -18.478363037109375, 157.13206481933594, 183.36611938476562, 143.22134399414062, 114.5499267578125, 40.497169494628906, 104.95368957519531, 13.511611938476562, 174.37867736816406, 43.01477813720703, 93.8798828125, 67.95640563964844, 31.288299560546875, 45.735626220703125, 3.7673492431640625, 78.64044189453125, 41.005287170410156, -154.96890258789062, 142.11341857910156, 78.86134338378906, 47.0927734375, 337.98272705078125, 7.391998291015625, 349.9020690917969, 247.158935546875, -2.1775970458984375, 220.63409423828125, 89.76576232910156, -61.91157531738281, -22.297531127929688, 113.27578735351562, 439.6043701171875, -7.712303161621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 55.67503356933594, "std": 103.79989624023438, "min": -156.75559997558594, "p10": -83.11426162719727, "median": 39.76560592651367, "p90": 208.96661987304694, "max": 293.6553955078125, "pos_frac": 0.703125, "sample": [99.78765106201172, 88.71118927001953, -86.41969299316406, 19.3128662109375, -89.2672119140625, -1.3588790893554688, 82.36125946044922, 119.21681213378906, -13.965621948242188, -86.00240325927734, 135.30938720703125, -155.3729248046875, -83.00982666015625, 35.351707458496094, 32.14866638183594, -95.03038787841797, -10.122562408447266, -54.33880615234375, 164.1221923828125, 152.62454223632812, 86.714111328125, 26.55120086669922, 16.749507904052734, 293.6553955078125, -156.75559997558594, 36.6217041015625, -21.51393699645996, -46.569862365722656, -7.764312744140625, 143.23065185546875, -83.15901947021484, 21.725706100463867, 219.89230346679688, 193.89260864257812, 53.340667724609375, -26.856590270996094, 82.8598861694336, 64.80271911621094, 80.93682098388672, 3.3667469024658203, 42.909507751464844, 65.38178253173828, 163.8169708251953, 76.19947814941406, 215.42691040039062, -15.740264892578125, 98.35829162597656, 61.86769104003906, 1.9157638549804688, -73.80903625488281, 32.657318115234375, 234.57699584960938, 28.981590270996094, -46.71753692626953, 62.56843948364258, 277.3907470703125, 29.07117462158203, 275.727294921875, 228.93380737304688, 124.39190673828125, 122.10435485839844, 15.614280700683594, 118.3763427734375, 187.41958618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 102.40840911865234, "std": 144.1895294189453, "min": -100.47714233398438, "p10": -36.34766998291015, "median": 69.1132583618164, "p90": 277.6223297119142, "max": 577.4794921875, "pos_frac": 0.796875, "sample": [471.8585205078125, 325.95452880859375, 34.95170974731445, 504.653564453125, -39.32923126220703, 4.077495574951172, 22.11923599243164, 73.71902465820312, 178.73663330078125, 52.444610595703125, 137.87596130371094, -10.249076843261719, 11.642629623413086, 167.3948211669922, -71.47286987304688, 82.00297546386719, 162.14224243164062, 17.81414794921875, -8.306026458740234, 202.39727783203125, 1.1520862579345703, 577.4794921875, 46.62935256958008, -50.698482513427734, -51.33002471923828, -55.85660171508789, 47.16034698486328, 155.12484741210938, -100.47714233398438, 244.23721313476562, 291.93023681640625, 482.7619323730469, 118.37418365478516, 71.51467895507812, 27.927276611328125, 375.4986572265625, 13.418876647949219, 210.72772216796875, 30.831207275390625, 161.15411376953125, -16.364307403564453, 91.88079833984375, 20.122665405273438, 9.77459716796875, 185.1793975830078, 131.6651611328125, 220.6498565673828, -7.871753692626953, 71.58892822265625, 179.02191162109375, 33.447933197021484, 88.23631286621094, -67.04751586914062, 66.71183776855469, -6.4709930419921875, -29.39069366455078, 120.68909454345703, 3.0709991455078125, 122.74124145507812, 6.4715576171875, 92.54690551757812, 15.661710739135742, 211.8961181640625, 91.93814849853516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 107.59046936035156, "std": 120.72675323486328, "min": -132.67779541015625, "p10": -20.36880493164062, "median": 94.27594375610352, "p90": 264.9112045288087, "max": 449.5419921875, "pos_frac": 0.828125, "sample": [101.68589782714844, 67.47007751464844, 143.84542846679688, 220.3682098388672, 118.92214965820312, 124.15087890625, 104.47686767578125, 72.19400024414062, 104.69812774658203, 46.37968063354492, 449.5419921875, 195.37881469726562, 76.54267883300781, -18.162826538085938, 25.02197265625, 65.64094543457031, 84.17823028564453, -63.559547424316406, 137.2861328125, 191.85836791992188, 24.533517837524414, 40.57041931152344, 83.79200744628906, 58.635398864746094, 108.91455078125, -132.67779541015625, 280.56146240234375, 231.3563232421875, -21.29962158203125, 179.15811157226562, -13.462997436523438, 107.72007751464844, 79.11062622070312, 59.287750244140625, 388.95416259765625, 169.47052001953125, 281.06109619140625, 86.8659896850586, 213.9540557861328, -130.60060119628906, 240.99705505371094, 275.1601257324219, 339.6420593261719, 148.59396362304688, 425.23675537109375, -18.1968994140625, 186.74575805664062, 141.77490234375, 220.52560424804688, 180.01881408691406, 102.22772216796875, 8.861892700195312, -3.035116195678711, 34.90376281738281, 67.47575378417969, 140.70199584960938, 105.64566802978516, 61.62694549560547, 20.910293579101562, -103.8676986694336, 24.424121856689453, 15.739892959594727, -50.23490905761719, -23.911911010742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 79.17437744140625, "std": 107.79267120361328, "min": -88.78468322753906, "p10": -52.10037460327148, "median": 57.577524185180664, "p90": 206.9502166748047, "max": 422.8857421875, "pos_frac": 0.796875, "sample": [22.998397827148438, 124.14095306396484, 25.154991149902344, 21.548015594482422, 3.002084732055664, 33.29667282104492, 142.92849731445312, -53.15830612182617, 21.368152618408203, 12.22711181640625, -88.78468322753906, 193.4143829345703, 422.8857421875, 97.54605102539062, -52.86090087890625, 203.42819213867188, 179.20770263671875, 248.8722381591797, 208.45965576171875, 53.671546936035156, 224.05111694335938, 280.2774658203125, 186.03466796875, 179.19210815429688, -45.017555236816406, -50.32581329345703, -17.81494903564453, 134.76290893554688, 154.18463134765625, 140.33224487304688, 95.88719177246094, 35.30964660644531, -64.31041717529297, 98.83309936523438, 37.671058654785156, 116.95315551757812, -73.91900634765625, -11.790653228759766, -26.728214263916016, 35.43553161621094, 28.933610916137695, 94.18800354003906, 67.79574584960938, 99.01840209960938, 117.26765441894531, 68.53617095947266, 28.67671775817871, -61.54560089111328, 289.9585876464844, 61.231178283691406, 156.77272033691406, 22.5921688079834, 22.556047439575195, 29.153152465820312, 131.68048095703125, 63.7294921875, 28.668546676635742, -87.19725799560547, 53.92387008666992, -43.705078125, 386.4058532714844, 19.91876983642578, 98.19493103027344, 142.04116821289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 69.26856994628906, "std": 134.7840118408203, "min": -290.2491455078125, "p10": -71.8534599304199, "median": 42.521711349487305, "p90": 239.99923553466803, "max": 568.08544921875, "pos_frac": 0.75, "sample": [82.8329849243164, 94.14659881591797, 6.143985748291016, 17.65473175048828, 224.51853942871094, 113.99297332763672, 22.147842407226562, 162.2271270751953, 93.3659896850586, 39.10392379760742, -1.9210700988769531, 40.250572204589844, -290.2491455078125, 3.862987518310547, 58.07594680786133, -27.831069946289062, 26.416725158691406, -90.77949523925781, 89.00450134277344, 173.0108642578125, 159.70321655273438, -57.051513671875, 69.22003173828125, -93.82830810546875, 216.9916534423828, 246.63381958007812, 63.75741195678711, 9.324432373046875, -21.127548217773438, 32.69651794433594, 217.19024658203125, -11.120689392089844, 24.430994033813477, 72.7319107055664, 95.53982543945312, 132.94749450683594, 257.9382629394531, 26.696638107299805, 70.93122863769531, 251.95680236816406, 6.027921676635742, 396.4375, -128.98068237304688, 96.99983215332031, 5.036949157714844, 59.09864807128906, 32.37980651855469, -78.19715118408203, 353.8731994628906, -14.540916442871094, 568.08544921875, -6.8935546875, 97.00151062011719, 16.27252960205078, 301.4596862792969, -13.697265625, 86.56748962402344, 15.458175659179688, 44.792850494384766, -29.090599060058594, -129.3486785888672, 95.83340454101562, 207.0243377685547, -149.9500732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 81.35664367675781, "std": 109.88703918457031, "min": -240.22244262695312, "p10": -33.24342346191406, "median": 68.18013191223145, "p90": 220.0009216308594, "max": 332.0155334472656, "pos_frac": 0.828125, "sample": [195.03533935546875, -141.37527465820312, 150.12591552734375, 82.06759643554688, 209.31942749023438, 81.65110778808594, 127.46745300292969, 13.019309997558594, 147.246826171875, 3.5076446533203125, 94.28866577148438, -2.2001686096191406, 76.25189208984375, -34.51191711425781, 8.956270217895508, -51.5640869140625, 118.2590560913086, 254.70571899414062, 18.640762329101562, 308.7727355957031, 4.781106948852539, -20.66573143005371, 170.44845581054688, 49.1548957824707, 2.6837158203125, 22.191368103027344, -240.22244262695312, -31.507362365722656, 72.77003479003906, 206.8292236328125, 75.16665649414062, 133.46311950683594, 57.30772018432617, 84.74734497070312, 182.50125122070312, -64.60739135742188, 314.63427734375, 6.643043518066406, 12.485189437866211, 332.0155334472656, 77.4746322631836, -100.50018310546875, 63.59022903442383, 120.41004943847656, 207.56930541992188, -33.987449645996094, 25.993974685668945, 2.8141040802001953, 42.24999237060547, 61.068458557128906, 96.11568450927734, 45.05278778076172, 246.33364868164062, 44.34312057495117, 136.09048461914062, 58.36334228515625, 188.9783172607422, 224.57870483398438, -15.261615753173828, 198.08026123046875, 237.9910888671875, 21.42161750793457, 208.060546875, 19.509456634521484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 67.01963806152344, "std": 134.7970733642578, "min": -251.87620544433594, "p10": -94.1959846496582, "median": 60.94600296020508, "p90": 216.85992126464845, "max": 596.4205932617188, "pos_frac": 0.71875, "sample": [-86.61256408691406, 218.17869567871094, 35.776023864746094, 96.80380249023438, -105.26378631591797, -53.53807830810547, 6.0579071044921875, 116.52291870117188, -79.2310791015625, 144.62176513671875, 12.713302612304688, 164.56747436523438, 95.23420715332031, 105.35694885253906, 53.4771728515625, -49.553916931152344, 105.890380859375, -101.76548767089844, 110.86549377441406, -8.981666564941406, 210.72091674804688, 75.70150756835938, 58.46211242675781, 90.80634307861328, 338.7505187988281, 78.32958984375, -97.45429229736328, 596.4205932617188, -99.48042297363281, 50.67974853515625, 74.24806213378906, -251.87620544433594, 432.5648193359375, 146.833740234375, 92.10426330566406, -111.47467041015625, 214.67184448242188, 107.05271911621094, 17.68323516845703, 298.98101806640625, 17.93218994140625, 63.429893493652344, 217.79766845703125, -26.789365768432617, -97.4460220336914, 7.45635986328125, 66.30675506591797, 94.97276306152344, -62.40493392944336, 109.8465576171875, -27.96556854248047, -50.587677001953125, 142.66192626953125, 45.51097869873047, 79.11050415039062, -10.921594619750977, 36.531890869140625, 161.61395263671875, -20.347129821777344, 1.751129150390625, 93.75821685791016, 18.198692321777344, 271.5094299316406, 52.48517990112305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 68.93881225585938, "std": 114.31993103027344, "min": -197.7449951171875, "p10": -71.66233825683592, "median": 76.20875930786133, "p90": 201.51256256103517, "max": 460.940673828125, "pos_frac": 0.6875, "sample": [-27.39190673828125, 102.56200408935547, -120.044677734375, 95.41778564453125, 39.48272705078125, -51.431907653808594, 148.2499237060547, 105.06727600097656, 79.52336883544922, -78.1851577758789, -129.53338623046875, 53.04923629760742, 58.89411163330078, -74.81289672851562, 158.58871459960938, 90.05157470703125, 15.652599334716797, 69.13090515136719, -62.34402084350586, 72.82714080810547, -38.33927917480469, 92.42202758789062, 197.13308715820312, 146.36776733398438, 23.58472442626953, 73.75284576416016, 235.79971313476562, 203.3894805908203, -83.95753479003906, 252.3541259765625, -28.2906494140625, 114.1524429321289, 111.27959442138672, 92.92611694335938, 125.45220947265625, 306.3617858886719, 460.940673828125, 44.3114013671875, 124.7221908569336, 30.018146514892578, -20.28789520263672, -43.85289001464844, 78.6646728515625, 80.99383544921875, 188.41757202148438, -25.575332641601562, -5.641298294067383, 25.527265548706055, -30.87982177734375, 173.55252075195312, 159.06788635253906, -3.0344619750976562, -64.31103515625, 105.36074829101562, 110.5263900756836, -197.7449951171875, 70.611328125, 107.77239990234375, 233.55947875976562, 102.52635192871094, -16.335357666015625, -81.77545928955078, 251.35806274414062, 184.45001220703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 69.41828918457031, "std": 99.79008483886719, "min": -192.04498291015625, "p10": -25.091445541381827, "median": 51.0056095123291, "p90": 216.18919982910163, "max": 362.7132568359375, "pos_frac": 0.796875, "sample": [44.47114562988281, 79.8901138305664, 35.610107421875, -3.1662826538085938, 1.4652748107910156, 44.11720275878906, -0.09634780883789062, 148.6276092529297, 124.28965759277344, 9.032331466674805, -15.865264892578125, -47.72224426269531, 16.232019424438477, 3.5656051635742188, 84.72051239013672, 273.16241455078125, 77.0798568725586, 55.3064079284668, 73.37677764892578, 203.86569213867188, 120.80567932128906, 94.50778198242188, 86.64848327636719, -64.04444885253906, 52.419029235839844, 27.19799041748047, 362.7132568359375, 29.952877044677734, 162.20169067382812, 221.470703125, 67.91022491455078, 18.17799949645996, -60.20832824707031, 118.03780364990234, 104.28233337402344, -17.255172729492188, 122.59822845458984, 1.86102294921875, -2.672952651977539, 235.52761840820312, 69.77644348144531, 199.37393188476562, -28.449848175048828, 38.43976593017578, -6.941307067871094, 14.585208892822266, 232.70449829101562, 120.06967163085938, -45.005767822265625, 0.6580963134765625, 96.42284393310547, 49.59218978881836, 132.33828735351562, -69.255126953125, -192.04498291015625, 346.34869384765625, 37.80003356933594, 5.727760314941406, 89.64741516113281, 20.832157135009766, 279.0347900390625, 63.59858703613281, 55.69565963745117, 41.7271728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 82.87537384033203, "std": 121.24845123291016, "min": -164.9907989501953, "p10": -50.061633300781246, "median": 63.924339294433594, "p90": 268.66345367431654, "max": 465.4854431152344, "pos_frac": 0.78125, "sample": [20.89239501953125, -4.8107452392578125, -48.873878479003906, -98.04206848144531, 1.1685943603515625, 39.346397399902344, -14.782512664794922, 8.944112777709961, 111.85774230957031, 208.61439514160156, 239.21730041503906, -0.6712417602539062, 67.41118621826172, 18.809221267700195, 61.900428771972656, -37.739013671875, 117.12074279785156, 168.03538513183594, 152.93287658691406, 281.2832336425781, 59.81651306152344, 36.133541107177734, 65.94824981689453, -18.88286781311035, 329.6784973144531, -85.19175720214844, 138.32318115234375, 305.17352294921875, 34.789642333984375, 97.00837707519531, 15.06348991394043, -15.98050308227539, 66.67427825927734, -83.82684326171875, 146.75732421875, -50.57067108154297, 184.46200561523438, 113.70694732666016, -125.09542846679688, 293.0501708984375, 319.90277099609375, 330.7876892089844, 36.33965301513672, 127.72750854492188, 13.549005508422852, -164.9907989501953, 3.3841094970703125, 148.27731323242188, 120.00541687011719, 152.10983276367188, 83.22598266601562, 52.00663757324219, 22.255332946777344, 146.05380249023438, -92.70316314697266, 179.34129333496094, 147.30136108398438, 98.60115051269531, 47.87335968017578, 13.720172882080078, 465.4854431152344, 126.29673767089844, 96.06109619140625, 31.759862899780273], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 112.97673797607422, "std": 112.26419830322266, "min": -94.55661010742188, "p10": -6.80973720550537, "median": 90.03810501098633, "p90": 249.4562194824219, "max": 438.73388671875, "pos_frac": 0.84375, "sample": [20.216901779174805, 39.31501007080078, 60.220855712890625, -94.55661010742188, 89.43818664550781, 169.51486206054688, 234.23928833007812, 90.63802337646484, 24.180606842041016, 147.3422393798828, 327.94842529296875, -73.16160583496094, -10.859466552734375, 318.55975341796875, 123.40731811523438, 40.151397705078125, 225.05703735351562, 231.3843536376953, 249.8387451171875, 167.37274169921875, 190.30337524414062, -68.5508804321289, 214.83267211914062, 438.73388671875, 40.57838439941406, 140.0111083984375, 345.88311767578125, 53.67645263671875, 128.27249145507812, 86.3651351928711, -5.393028259277344, 60.84123992919922, 306.8430480957031, 18.121227264404297, 4.877952575683594, 38.19391632080078, 2.211162567138672, 278.5028991699219, 7.326084136962891, 87.04536437988281, 129.60508728027344, 120.28605651855469, 128.64535522460938, 227.81918334960938, -0.4110279083251953, 226.5162353515625, 248.56365966796875, 190.9295196533203, -25.37607192993164, 107.02124786376953, 178.42031860351562, 95.39054870605469, 88.77103424072266, 151.7713623046875, 43.352561950683594, 72.46983337402344, 146.86050415039062, -7.249307632446289, 44.05536651611328, 233.68734741210938, -42.379981994628906, 72.86691284179688, -5.7840728759765625, 55.75596618652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 95.66221618652344, "std": 127.4686508178711, "min": -67.83738708496094, "p10": -38.06931762695312, "median": 63.70289993286133, "p90": 235.53159484863284, "max": 532.19384765625, "pos_frac": 0.765625, "sample": [3.40069580078125, 26.410369873046875, 95.79580688476562, 245.9292449951172, 70.22288513183594, 30.563159942626953, 12.905059814453125, 341.5982360839844, 19.692901611328125, -29.555877685546875, -38.96820068359375, 496.6075744628906, -15.390142440795898, -67.83738708496094, 236.80810546875, 65.3529281616211, 39.751861572265625, 224.1863250732422, 182.83914184570312, -2.4751815795898438, -5.5721282958984375, 75.12047576904297, 152.23095703125, -54.00061798095703, 232.55307006835938, 5.289007186889648, 28.841569900512695, 211.81495666503906, 190.00238037109375, 8.587776184082031, 120.34194946289062, -60.70635986328125, 154.43600463867188, -49.945594787597656, 43.18714904785156, 181.61392211914062, -47.00271224975586, 88.9068374633789, 190.98916625976562, 151.22589111328125, 192.51873779296875, 0.5571212768554688, -46.56022644042969, -2.499897003173828, 159.24365234375, 145.4744415283203, 136.8708038330078, 77.11043548583984, 143.49087524414062, 301.0985107421875, 35.61820983886719, -35.971923828125, 22.22607421875, 15.224002838134766, 141.1002655029297, 62.05287170410156, -17.316234588623047, 7.391590118408203, 134.74224853515625, -1.2440223693847656, 184.61187744140625, 532.19384765625, 16.45391845703125, 362.243408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 95.35098266601562, "std": 124.8912582397461, "min": -115.32731628417969, "p10": -24.59272861480713, "median": 71.84898376464844, "p90": 249.34195556640628, "max": 609.68310546875, "pos_frac": 0.71875, "sample": [181.21420288085938, 50.05145263671875, 239.45545959472656, 131.50027465820312, 69.00140380859375, 129.5292205810547, -115.32731628417969, 103.09295654296875, 211.34942626953125, 176.61239624023438, 266.61077880859375, 42.654266357421875, 94.11006164550781, 268.220458984375, 128.0028533935547, 85.72360229492188, 15.925727844238281, 375.7149963378906, 34.95722198486328, 175.21343994140625, 74.69656372070312, 199.5753631591797, -80.25617980957031, 160.49282836914062, -3.820119857788086, 44.787559509277344, 215.93768310546875, -22.65453338623047, -5.9532470703125, -2.586456298828125, 60.665199279785156, 308.473388671875, 114.37113952636719, -24.34637451171875, 27.1268310546875, 91.7735595703125, -20.066665649414062, 253.5790252685547, -0.5637111663818359, -29.16811180114746, -78.0408935546875, 162.22445678710938, -5.562522888183594, 56.20341491699219, -85.01287841796875, 160.78358459472656, 130.5395965576172, 18.694347381591797, -11.155296325683594, -3.5866241455078125, 218.0139617919922, 16.796693801879883, 85.52450561523438, -15.820426940917969, 191.48776245117188, 37.33805465698242, 38.47087860107422, -24.69830894470215, 170.96017456054688, 45.7125244140625, 609.68310546875, 302.8582763671875, 82.4930648803711, -27.121383666992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 118.4439926147461, "std": 149.21286010742188, "min": -147.0530242919922, "p10": -36.309697723388666, "median": 105.30853271484375, "p90": 333.0084259033204, "max": 592.5118408203125, "pos_frac": 0.765625, "sample": [48.546260833740234, 15.812416076660156, 169.31118774414062, -7.86370849609375, 148.36868286132812, 93.86861419677734, 111.22623443603516, 40.22760009765625, 22.752765655517578, 291.49139404296875, -4.099510192871094, 120.85194396972656, 304.4604187011719, 114.49208068847656, 50.92772674560547, 162.6987762451172, 414.0773620605469, 129.39389038085938, 592.5118408203125, -62.48222351074219, 218.7324981689453, 62.351585388183594, 13.115570068359375, 60.96869659423828, -8.334695816040039, 180.59463500976562, 408.0395202636719, -56.45812225341797, 51.9542236328125, -7.7879180908203125, -90.61671447753906, 107.38916015625, 228.095458984375, 21.392288208007812, 132.30908203125, 191.0531463623047, 139.3807373046875, -105.82333374023438, 69.9169692993164, 269.1380615234375, 31.42861557006836, 125.55313110351562, -147.0530242919922, 450.5814514160156, -9.286577224731445, 165.48728942871094, 125.24493408203125, 165.939453125, -39.875953674316406, 462.9076232910156, 38.9857177734375, 146.4351806640625, 359.1806335449219, -5.318094253540039, 7.46551513671875, 231.34864807128906, 103.2279052734375, -27.988433837890625, 81.96965026855469, 234.7012939453125, 345.2432861328125, -92.63302612304688, 197.05984497070312, -12.174278259277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 83.05406188964844, "std": 148.7332305908203, "min": -303.71441650390625, "p10": -90.14387664794921, "median": 79.26416015625, "p90": 315.7486419677735, "max": 407.9969787597656, "pos_frac": 0.703125, "sample": [129.05447387695312, 167.4517059326172, 187.14111328125, 407.9969787597656, 67.66534423828125, -97.4222412109375, 187.8878936767578, 17.859737396240234, -14.487564086914062, 16.6772518157959, 91.90420532226562, 78.58690643310547, 50.04143524169922, 96.61417388916016, -18.90210723876953, 92.06422424316406, -84.37677001953125, 338.2276916503906, -58.45998764038086, -56.131011962890625, 79.94141387939453, -92.61549377441406, 374.0668640136719, 26.326021194458008, 234.65174865722656, 128.2752227783203, 171.1832733154297, 320.745361328125, 177.3199462890625, 40.895713806152344, 86.58856201171875, 304.0896301269531, 364.40814208984375, 96.39508056640625, 75.69247436523438, 137.904541015625, -35.413055419921875, -28.211139678955078, 1.1240158081054688, 98.5460205078125, 74.43248748779297, -40.17671203613281, 84.3718490600586, -103.74612426757812, 134.07913208007812, 345.0398864746094, -94.92711639404297, 260.79541015625, 117.32483673095703, -138.560546875, -59.486305236816406, 261.291259765625, 25.599075317382812, 259.0882873535156, 321.1358947753906, 224.25132751464844, -181.247314453125, 142.38052368164062, -67.06169891357422, -79.7474365234375, -303.71441650390625, 15.3758544921875, 24.274213790893555, -66.6205062866211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 107.56155395507812, "std": 132.34535217285156, "min": -213.4706573486328, "p10": -29.634654235839843, "median": 89.76552963256836, "p90": 274.38848571777345, "max": 532.4744873046875, "pos_frac": 0.828125, "sample": [109.81366729736328, 173.3740234375, -84.97755432128906, 3.157215118408203, -78.01805877685547, 532.4744873046875, -25.794353485107422, 75.40057373046875, 106.15809631347656, 65.29153442382812, 4.892906188964844, 250.13458251953125, -213.4706573486328, 145.51292419433594, 189.5371551513672, 125.2879638671875, 87.02346801757812, 90.77836608886719, 111.66654205322266, 71.45759582519531, 172.99102783203125, -158.30885314941406, 53.32158279418945, 305.5462951660156, 71.99178314208984, 231.20526123046875, 166.82220458984375, 4.184547424316406, 41.63103485107422, 56.95305252075195, 289.2088623046875, 2.181560516357422, 272.7079162597656, 169.74087524414062, 80.7057113647461, -4.895254135131836, 67.94715118408203, 212.68215942382812, -29.682769775390625, 193.57009887695312, 95.89363098144531, 17.99707794189453, 256.2949523925781, 63.373046875, -133.99624633789062, 87.77621459960938, 1.44842529296875, 278.44287109375, 258.089111328125, 97.3898696899414, 155.54666137695312, 274.7196044921875, -26.545400619506836, 345.4097900390625, 88.75269317626953, -29.522384643554688, 203.95355224609375, 159.97373962402344, 34.29535675048828, -77.34540557861328, 273.6158752441406, 299.49847412109375, 31.05390167236328, 187.61972045898438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 66.72486877441406, "std": 123.44548797607422, "min": -226.958984375, "p10": -63.96866683959961, "median": 48.153194427490234, "p90": 218.58205566406258, "max": 421.2730712890625, "pos_frac": 0.765625, "sample": [145.81907653808594, -62.457481384277344, 16.01274299621582, 60.28483963012695, 119.97576141357422, -4.051017761230469, 32.41438674926758, 29.360733032226562, 93.48291778564453, 132.53074645996094, -6.671741485595703, 16.373605728149414, 4.177486419677734, 13.610191345214844, -175.10711669921875, 235.40155029296875, 128.0718994140625, -31.996421813964844, 83.46331787109375, -14.564022064208984, 43.845096588134766, -64.61631774902344, -17.151473999023438, 233.23716735839844, -82.72749328613281, 89.27503967285156, 134.67770385742188, 295.20037841796875, 48.722679138183594, 62.42393493652344, 139.92654418945312, 82.39315032958984, -215.8413848876953, 47.583709716796875, 21.382583618164062, 94.96006774902344, 14.146833419799805, 195.2924346923828, 134.97401428222656, -72.61036682128906, 17.314071655273438, -226.958984375, 226.81094360351562, -22.550350189208984, 376.690185546875, 174.7134552001953, -173.97804260253906, 57.47697448730469, -41.968017578125, 140.92230224609375, 173.02684020996094, 421.2730712890625, 161.57289123535156, 43.07123947143555, 134.5911102294922, 7.453334808349609, 4.995738983154297, 4.938636779785156, 295.470458984375, 60.64717483520508, 29.880332946777344, 199.38131713867188, 157.809814453125, 46.58118438720703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 119.17942810058594, "std": 143.7060546875, "min": -255.6307373046875, "p10": -55.96593971252439, "median": 107.12614440917969, "p90": 298.0251892089844, "max": 422.00384521484375, "pos_frac": 0.828125, "sample": [300.875244140625, 85.36518859863281, 331.08856201171875, 203.19058227539062, 89.859130859375, 234.51988220214844, 244.62527465820312, 284.47381591796875, 358.5390625, 77.2684326171875, 145.5559844970703, 46.59486770629883, 31.531564712524414, -17.937458038330078, 291.37506103515625, -255.6307373046875, 255.57717895507812, 67.13682556152344, 5.030576705932617, 53.721160888671875, 422.00384521484375, -1.0861492156982422, 262.3522644042969, 124.39315795898438, 159.34677124023438, -65.13951110839844, 259.9362487792969, 87.26956176757812, 14.265506744384766, 71.24593353271484, 159.77777099609375, 287.63861083984375, -117.20305633544922, -131.42739868164062, 175.87356567382812, 245.47366333007812, 47.87675476074219, 152.84353637695312, 234.15774536132812, 242.76707458496094, -76.14839935302734, 80.05496978759766, 36.219879150390625, 142.8070831298828, 184.6959686279297, 183.68600463867188, 194.70501708984375, 136.0834503173828, -134.35690307617188, 5.586509704589844, 400.4111022949219, 152.0809326171875, 48.16194152832031, 10.150232315063477, -33.42913818359375, 8.707622528076172, -133.67684936523438, 74.45874786376953, 310.60821533203125, 20.734535217285156, 374.13995361328125, -34.56093978881836, 48.77374267578125, 162.46385192871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 63.464420318603516, "std": 115.38804626464844, "min": -167.63026428222656, "p10": -87.17522811889648, "median": 61.848703384399414, "p90": 204.62062835693365, "max": 358.21636962890625, "pos_frac": 0.765625, "sample": [11.441635131835938, 77.27753448486328, -11.906187057495117, 61.685150146484375, 18.519031524658203, 176.09385681152344, 150.7538604736328, 147.22714233398438, 47.36268615722656, 59.3262939453125, -88.25654602050781, 141.949951171875, 71.07490539550781, 18.078210830688477, 35.78460693359375, 89.38444519042969, 358.21636962890625, 250.3148193359375, 144.78379821777344, 32.22282409667969, 64.31134796142578, 124.03004455566406, 310.63226318359375, 109.87013244628906, -12.156227111816406, 5.722967147827148, 75.35003662109375, 62.01225662231445, 0.492767333984375, -167.63026428222656, 184.66867065429688, 83.59931182861328, -56.175209045410156, 13.918420791625977, 188.75885009765625, 76.33560180664062, 40.83852767944336, 110.72100067138672, 141.33795166015625, 246.5989990234375, -141.2458038330078, -40.94744110107422, 352.51068115234375, -84.65215301513672, 29.30774688720703, -30.373367309570312, 86.65518951416016, -102.22062683105469, -131.12652587890625, -115.80117797851562, 165.95025634765625, 127.00994873046875, 211.4185333251953, 212.2147216796875, 91.4454345703125, 50.12488555908203, 26.2725830078125, -78.72249603271484, 153.05709838867188, 88.06625366210938, -159.00408935546875, -68.70187377929688, 22.409622192382812, 3.50384521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 66.39598083496094, "std": 117.8230209350586, "min": -171.05429077148438, "p10": -53.17679786682129, "median": 35.01859092712402, "p90": 219.4947021484375, "max": 360.34783935546875, "pos_frac": 0.734375, "sample": [28.729278564453125, -114.6791000366211, 70.95149993896484, 86.12110137939453, 189.3871612548828, 209.3748779296875, -7.1610260009765625, 11.80169677734375, 166.7142333984375, -18.221923828125, 321.7052001953125, 360.34783935546875, 65.95755004882812, 226.146728515625, -52.93584442138672, 207.24380493164062, 8.728042602539062, 219.84243774414062, 103.21549987792969, 283.07373046875, -22.46401596069336, 317.497802734375, -92.88912963867188, 19.082260131835938, 40.72694778442383, 41.116905212402344, 42.050140380859375, 4.520988464355469, 90.28062438964844, 3.7306880950927734, 39.67091369628906, 106.44412231445312, 154.7317352294922, 207.06288146972656, -94.96249389648438, 218.68331909179688, 16.576522827148438, -23.930686950683594, -41.643367767333984, 20.357067108154297, 72.91544342041016, -23.06836700439453, 35.23701095581055, 7.616861343383789, 16.908973693847656, 208.97096252441406, 73.9889144897461, 56.13333511352539, -57.678466796875, -142.77642822265625, -53.28006362915039, 21.036916732788086, -5.546924591064453, 2.1198348999023438, -171.05429077148438, 31.055343627929688, -39.64411926269531, 182.48214721679688, 34.8001708984375, 111.95880889892578, 23.083106994628906, -0.33541107177734375, 100.84616088867188, 350.5870056152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 53.11936950683594, "std": 126.5738754272461, "min": -191.86764526367188, "p10": -64.44929542541503, "median": 27.729345321655273, "p90": 210.59169769287118, "max": 441.0353088378906, "pos_frac": 0.609375, "sample": [-166.28689575195312, -65.55870056152344, -20.903703689575195, 86.22268676757812, 78.34175109863281, 15.441072463989258, -6.277807235717773, 79.78620910644531, 432.9805603027344, -1.3311080932617188, 23.888805389404297, 115.0971450805664, 108.3232421875, -9.330101013183594, 271.5924987792969, 41.916748046875, -14.54542350769043, 42.40947723388672, 151.69935607910156, -114.33865356445312, -10.844072341918945, 41.478397369384766, 26.615032196044922, 87.9593276977539, 84.34866333007812, -10.091537475585938, -128.38819885253906, 22.883705139160156, 91.13971710205078, -141.23245239257812, 36.32342529296875, 20.960079193115234, 94.05188751220703, 101.94474792480469, 28.843658447265625, 366.827392578125, -55.038787841796875, 31.876380920410156, 190.38888549804688, -49.64272689819336, 38.511165618896484, 3.4228038787841797, -22.790809631347656, -10.919418334960938, -24.53203773498535, -4.68324089050293, 336.8876647949219, 139.359375, 147.11294555664062, -0.1931438446044922, 441.0353088378906, -30.191307067871094, 26.207809448242188, 72.28556823730469, -18.064193725585938, 219.2500457763672, 39.83967590332031, 130.39942932128906, -191.86764526367188, 285.7567443847656, -82.73247528076172, 98.92646026611328, -11.050857543945312, -61.86068344116211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 100.02879333496094, "std": 122.57817077636719, "min": -198.38394165039062, "p10": -26.59964141845703, "median": 81.69730377197266, "p90": 245.239779663086, "max": 470.824462890625, "pos_frac": 0.828125, "sample": [-198.38394165039062, 131.64637756347656, 327.7728576660156, 154.4007568359375, 158.35333251953125, 1.03131103515625, 85.05056762695312, 45.3349723815918, 223.06365966796875, 153.21450805664062, 69.72982788085938, 225.20315551757812, 0.9165420532226562, 84.45298767089844, 41.866695404052734, -29.263813018798828, 100.62612915039062, -123.85862731933594, 44.25204086303711, 52.83116912841797, 312.9679870605469, 50.83401870727539, -0.9436264038085938, 181.12210083007812, 43.61768341064453, 76.59455871582031, 100.11640167236328, 154.09701538085938, 339.34564208984375, 111.56027221679688, 178.58615112304688, -34.47602844238281, 156.3492431640625, 73.46281433105469, 27.81654167175293, 122.56874084472656, 404.8489990234375, 253.826904296875, 47.547855377197266, 8.944690704345703, 163.28854370117188, 34.26683044433594, 78.94161987304688, 69.82559204101562, -76.24090576171875, 412.14300537109375, -25.422653198242188, 69.53594207763672, -27.10406494140625, -44.616905212402344, 470.824462890625, 160.64993286132812, 75.18315124511719, 152.19705200195312, 5.826751708984375, 151.659912109375, 96.13783264160156, -24.824424743652344, 4.444244384765625, 124.90411376953125, 150.37600708007812, 139.45729064941406, -12.407302856445312, 95.76885986328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 83.32471466064453, "std": 110.32229614257812, "min": -158.01864624023438, "p10": -39.50836334228515, "median": 63.41030502319336, "p90": 219.8302429199219, "max": 396.59588623046875, "pos_frac": 0.78125, "sample": [-158.01864624023438, 209.8709716796875, -26.54060173034668, 60.43254470825195, -76.39663696289062, 243.96051025390625, 84.84815979003906, 172.13641357421875, 54.57267761230469, 40.38374710083008, 77.69375610351562, 62.15093231201172, 346.317626953125, 78.8197250366211, 133.1589813232422, 215.71368408203125, 165.63233947753906, 72.95320129394531, 115.58558654785156, -46.96388626098633, 61.38014221191406, 33.38316345214844, 396.59588623046875, -32.015533447265625, 171.97911071777344, 56.02923583984375, 240.06704711914062, -40.46049118041992, 196.4984893798828, 44.77085876464844, -50.18980407714844, 76.43248748779297, 19.90526580810547, -57.205902099609375, -37.2867317199707, 161.03805541992188, -31.96440887451172, 12.204046249389648, -4.398841857910156, 221.28475952148438, 156.7869415283203, 351.15277099609375, 97.7419662475586, 47.15064239501953, 64.669677734375, 35.15778732299805, 72.72816467285156, 40.21770477294922, -124.38617706298828, 43.95502471923828, 119.3746109008789, -21.81200408935547, 167.11338806152344, 92.25982666015625, 76.99030303955078, 209.4409637451172, 90.66022491455078, 1.4820785522460938, 54.970703125, 241.33326721191406, 216.43637084960938, 39.39684295654297, -23.960996627807617, 19.56386375427246], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 99.7897720336914, "std": 123.27433776855469, "min": -139.55410766601562, "p10": -17.474997329711908, "median": 77.86433792114258, "p90": 289.543359375, "max": 506.6097412109375, "pos_frac": 0.796875, "sample": [306.14666748046875, 128.702880859375, 247.89292907714844, 70.63471984863281, 79.31143951416016, -2.03594970703125, -28.7559814453125, -131.49114990234375, 295.38592529296875, 78.29706573486328, 55.328277587890625, -30.253562927246094, 118.0898666381836, 80.33900451660156, 290.3565673828125, 248.4999237060547, 93.84668731689453, -20.13538360595703, 63.72975158691406, 10.59349250793457, -1.1241912841796875, 31.269981384277344, 35.600555419921875, 146.17990112304688, 76.54325103759766, 23.174583435058594, 13.32940673828125, -7.417942047119141, 332.8814392089844, 51.033203125, 375.61248779296875, -139.55410766601562, 287.6458740234375, 105.42523193359375, 167.5093994140625, 118.22552490234375, 62.10151672363281, 194.33628845214844, -10.392187118530273, 200.09747314453125, 176.52598571777344, 12.17974853515625, -94.27362060546875, -23.98598289489746, 197.9953155517578, 43.12226104736328, 220.8612060546875, 44.165714263916016, 19.9227294921875, 328.2691955566406, 87.23722076416016, 171.6080322265625, 56.7377815246582, -10.515634536743164, 179.75563049316406, 84.76420593261719, 99.47100830078125, 506.6097412109375, 97.6527099609375, 77.43161010742188, 80.6395263671875, -11.26742935180664, 7.687599182128906, 16.990015029907227], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 84.35990905761719, "std": 137.5886688232422, "min": -176.24826049804688, "p10": -66.38988494873045, "median": 62.0582389831543, "p90": 208.49954528808595, "max": 595.1807250976562, "pos_frac": 0.78125, "sample": [43.63166046142578, 186.1761016845703, -118.34442138671875, 364.03240966796875, -69.77477264404297, -58.49181365966797, 42.7940673828125, 5.516014099121094, 4.286504745483398, 38.848876953125, 151.31497192382812, 160.2491455078125, 154.7286376953125, -11.62213134765625, 168.29962158203125, 48.47209167480469, 194.486328125, 52.679195404052734, -168.90374755859375, 6.663934707641602, -87.0750961303711, 31.151010513305664, 80.72904968261719, 177.92233276367188, 156.63475036621094, 84.71730041503906, -45.06044006347656, 61.66061019897461, 34.88252639770508, 202.30291748046875, 7.376361846923828, 30.39424705505371, 94.59625244140625, 504.23345947265625, 64.25723266601562, -24.191207885742188, 28.257308959960938, 236.08946228027344, 79.12126159667969, 16.89940643310547, -137.0244140625, 129.83599853515625, -49.71221923828125, 130.38702392578125, 62.455867767333984, 342.4052734375, 170.5051727294922, 595.1807250976562, -20.4415283203125, 130.23912048339844, 174.67054748535156, 30.703445434570312, 83.38198852539062, 162.93624877929688, 114.33598327636719, 211.15524291992188, 250.76528930664062, 93.32331848144531, -2.9629974365234375, -176.24826049804688, 57.47477722167969, -80.47742462158203, 26.961334228515625, 169.2426300048828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 97.3665771484375, "std": 142.3328094482422, "min": -208.1963348388672, "p10": -62.189521026611324, "median": 67.98239517211914, "p90": 316.7326171875, "max": 427.2290954589844, "pos_frac": 0.796875, "sample": [-5.2680206298828125, 117.08576202392578, 194.93612670898438, 165.7936248779297, 20.895858764648438, -81.49628448486328, 263.878173828125, 39.723960876464844, 13.323034286499023, 370.013916015625, 90.54043579101562, 73.57891082763672, 136.40139770507812, 19.780014038085938, 375.08563232421875, 138.54026794433594, 103.14506530761719, 71.11454772949219, 193.30410766601562, 12.78033447265625, 2.8823165893554688, 275.34698486328125, -208.1963348388672, 62.873741149902344, 261.9151611328125, -24.72607421875, 64.82891845703125, 328.22430419921875, 155.96412658691406, 54.455535888671875, -23.151138305664062, 316.9405517578125, 21.20214080810547, -5.536752700805664, 136.94491577148438, 78.88734436035156, -88.53560638427734, -63.99296569824219, -159.12014770507812, 15.633499145507812, 116.7684555053711, -57.981483459472656, 112.0745620727539, 47.35804748535156, -28.08384895324707, -99.90470886230469, 109.79624938964844, 316.2474365234375, 53.51263427734375, 58.65564727783203, -190.78907775878906, 398.211181640625, 125.9102783203125, 23.703561782836914, 209.99000549316406, 108.35138702392578, 64.8502426147461, 386.48846435546875, 5.260520935058594, 427.2290954589844, 219.60821533203125, 227.2854461669922, 37.80324935913086, 43.11785888671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 141.63238525390625, "std": 153.9884490966797, "min": -148.90679931640625, "p10": -8.783157348632807, "median": 119.20943450927734, "p90": 331.0032806396485, "max": 580.7034912109375, "pos_frac": 0.859375, "sample": [484.1036071777344, 117.22984313964844, 580.7034912109375, 33.64866638183594, 304.6588134765625, 79.63859558105469, 143.52120971679688, 270.5721435546875, 16.14045524597168, 361.6982421875, 104.72027587890625, 174.52584838867188, -11.004043579101562, 275.5415954589844, 298.4255676269531, 55.380165100097656, 37.42134094238281, 85.04903411865234, 40.77760314941406, 82.1490249633789, 261.6842041015625, 58.085418701171875, 47.497467041015625, 309.500732421875, 124.60822296142578, 7.5731964111328125, 8.188297271728516, 549.0107421875, 194.6242218017578, -24.12757110595703, 233.39962768554688, -79.30227661132812, 17.134971618652344, -51.6852912902832, 143.6072998046875, 182.77227783203125, 187.44606018066406, 156.05621337890625, 334.6549072265625, 31.090993881225586, 10.674474716186523, 252.30325317382812, 229.8055419921875, -148.90679931640625, 77.49253845214844, 527.5185546875, 32.32732391357422, -66.54570007324219, 121.18902587890625, 105.42106628417969, 185.1920623779297, 322.4828186035156, 33.810447692871094, -2.3785667419433594, 128.1222686767578, 180.0294189453125, -61.29772186279297, 131.24041748046875, -3.6010894775390625, 126.54877471923828, 360.95086669921875, 32.86432647705078, 90.99916076660156, 171.50881958007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 92.6939926147461, "std": 148.6688995361328, "min": -253.32009887695312, "p10": -55.62595825195312, "median": 73.95022964477539, "p90": 321.22759399414065, "max": 545.5682373046875, "pos_frac": 0.734375, "sample": [-55.73023223876953, -141.50001525878906, 45.590553283691406, 175.3048858642578, 135.93540954589844, -28.650611877441406, -55.382652282714844, -5.286893844604492, 30.964859008789062, -253.32009887695312, 31.40283203125, 97.3438949584961, -40.953948974609375, 220.0623016357422, 17.005233764648438, 39.68043518066406, -78.06648254394531, 105.05758666992188, -50.328521728515625, 176.9602813720703, -120.21431732177734, 411.2672424316406, -41.68043899536133, 75.49166107177734, -25.533065795898438, 325.94268798828125, 361.0145263671875, 373.6506652832031, 163.09934997558594, 367.28057861328125, 97.66181945800781, 76.70803833007812, 545.5682373046875, 122.00453186035156, 175.7959747314453, 29.014036178588867, 123.10093688964844, 221.9819793701172, 275.2434997558594, 61.93721389770508, 42.67820739746094, 310.2257080078125, 8.120477676391602, 36.505126953125, 137.21136474609375, 57.540122985839844, 61.68901824951172, -166.7995147705078, 200.90615844726562, 79.39371490478516, 85.75363159179688, -12.59893798828125, -33.711326599121094, -74.70035552978516, 14.262248992919922, 230.00314331054688, 72.40879821777344, -29.188674926757812, 36.5860595703125, 218.80880737304688, 141.6807861328125, 116.19525909423828, 79.18324279785156, 334.8384094238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 55.17446517944336, "std": 143.82960510253906, "min": -431.9443359375, "p10": -78.84361419677732, "median": 62.395206451416016, "p90": 235.58886871337896, "max": 368.573486328125, "pos_frac": 0.640625, "sample": [118.36280822753906, -14.786415100097656, 29.028953552246094, 193.28952026367188, -431.9443359375, 105.5948257446289, 68.45084381103516, 62.39117431640625, -409.44171142578125, 315.1624755859375, 65.15896606445312, -5.872320175170898, 195.99884033203125, -32.2546501159668, -17.756088256835938, 97.15567016601562, 96.83724212646484, 129.35694885253906, -2.7988204956054688, 248.6346435546875, 2.7104415893554688, -21.38872528076172, 18.346378326416016, 150.56814575195312, 27.215559005737305, 126.78990173339844, -55.659542083740234, -35.04077911376953, -4.5085601806640625, -60.96321105957031, -24.356969833374023, 220.3099822998047, -84.54782104492188, -95.5261459350586, 242.136962890625, -131.08340454101562, 189.48361206054688, -104.49178314208984, 12.86192512512207, 62.39923858642578, 10.196945190429688, 147.67694091796875, 277.289794921875, 163.455322265625, 123.7413330078125, -137.5654754638672, 327.96929931640625, 95.95085906982422, 127.59835052490234, 129.27891540527344, -62.86103057861328, 95.51610565185547, -4.017803192138672, -65.53379821777344, -51.5927734375, 136.79080200195312, 368.573486328125, 132.02378845214844, -59.38162612915039, 21.84839630126953, 293.6315612792969, 87.7801284790039, 20.676177978515625, 106.29627227783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 88.51359558105469, "std": 103.28008270263672, "min": -102.3092041015625, "p10": -53.13686065673827, "median": 68.5226058959961, "p90": 249.58011474609376, "max": 319.38323974609375, "pos_frac": 0.8125, "sample": [142.57864379882812, 21.156227111816406, 32.36981964111328, 14.972976684570312, 121.62034606933594, 259.898193359375, 111.58848571777344, 176.878173828125, -24.2515869140625, 20.908042907714844, 32.95960998535156, 255.4810791015625, -79.26171875, -58.393829345703125, 47.460853576660156, 65.20083618164062, 214.04666137695312, 58.38249969482422, 65.71200561523438, 136.0826416015625, 32.36275100708008, 82.86930847167969, 21.404651641845703, 8.27468490600586, 116.4416275024414, -87.56221008300781, -3.8446788787841797, 221.739501953125, 67.50009155273438, 73.37276458740234, 251.4528350830078, 137.5070037841797, 245.21043395996094, 183.76348876953125, 31.093589782714844, 294.17193603515625, 66.60560607910156, 34.088844299316406, 146.587158203125, 49.025917053222656, 192.9694366455078, -92.91177368164062, 41.573631286621094, -10.041450500488281, 106.31538391113281, 171.0614013671875, -6.447246551513672, -102.3092041015625, 40.39204406738281, -57.32008361816406, 113.9298095703125, -65.3884506225586, 63.28868103027344, 253.03956604003906, 81.23866271972656, 319.38323974609375, 134.57569885253906, 69.54512023925781, 296.99530029296875, 89.03939056396484, 179.05917358398438, 183.456787109375, 119.34588623046875, -43.376007080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 95.76254272460938, "std": 117.97086334228516, "min": -116.4842758178711, "p10": -45.9299430847168, "median": 83.829833984375, "p90": 241.60352172851566, "max": 407.17236328125, "pos_frac": 0.8125, "sample": [85.8467788696289, -20.696746826171875, 226.23094177246094, 47.733455657958984, 69.74806213378906, 407.17236328125, 73.98224639892578, 305.73748779296875, 220.15069580078125, -12.494161605834961, 5.8118896484375, 53.654632568359375, 8.121055603027344, -92.34385681152344, 245.2684326171875, 118.29209899902344, 57.860198974609375, 216.47962951660156, -45.95746612548828, 327.39263916015625, -38.05056381225586, 108.32264709472656, -103.18775939941406, 39.635650634765625, 109.44015502929688, 111.53459930419922, 227.75674438476562, 305.391357421875, 150.9771728515625, 129.39401245117188, 151.35781860351562, -78.31497955322266, 89.52819061279297, 189.78652954101562, 14.118621826171875, 157.74365234375, -30.60816764831543, 3.7834243774414062, 173.8504638671875, 211.95130920410156, -116.4842758178711, 265.82061767578125, 41.861576080322266, 79.8698959350586, 394.35174560546875, 141.48541259765625, 81.8128890991211, 59.50950622558594, -58.36290740966797, 12.818601608276367, 162.80491638183594, -86.14085388183594, 233.05206298828125, 113.23951721191406, 24.696426391601562, 87.48989868164062, 34.72215270996094, 56.125457763671875, 16.978818893432617, 85.88575744628906, -45.86572265625, 56.149234771728516, 153.59149169921875, 110.98934936523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 107.95289611816406, "std": 144.41552734375, "min": -222.81033325195312, "p10": -47.54222908020019, "median": 77.84410095214844, "p90": 307.29429016113284, "max": 452.49713134765625, "pos_frac": 0.765625, "sample": [206.21475219726562, 205.57421875, 309.5201110839844, 223.9031219482422, -49.65839385986328, 302.1007080078125, 300.98956298828125, 438.97369384765625, 233.9508056640625, 74.7999267578125, 7.504283905029297, 49.69481658935547, 274.3493347167969, 193.86563110351562, 50.621002197265625, 6.200960159301758, -28.38949966430664, 90.58619689941406, -7.370849609375, 48.15699768066406, 432.3712158203125, -15.245830535888672, 57.53709411621094, 50.85493087768555, 130.93801879882812, 132.94515991210938, 131.5071563720703, 146.392333984375, 49.97343444824219, -9.351369857788086, 107.05068969726562, 78.99755859375, 431.82708740234375, 452.49713134765625, -21.250595092773438, 178.48011779785156, 91.36257934570312, 68.58473205566406, 111.35711669921875, 76.69064331054688, 67.49112701416016, 57.83275604248047, 236.034912109375, -2.8584938049316406, 155.16616821289062, 35.73740005493164, 81.95870208740234, -158.7817840576172, 68.56717681884766, -222.81033325195312, -70.32839965820312, 149.73699951171875, 92.55661010742188, -42.60451126098633, 318.93231201171875, -57.95545196533203, -127.70614624023438, 66.7467041015625, -4.32940673828125, 134.45986938476562, 273.182373046875, 320.8529052734375, -82.92178344726562, 4.919094085693359], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 83.12100982666016, "std": 116.2892837524414, "min": -129.53468322753906, "p10": -35.76622924804687, "median": 54.03647994995117, "p90": 245.027555847168, "max": 425.2227783203125, "pos_frac": 0.8125, "sample": [57.53886413574219, 331.7962646484375, 155.87486267089844, -25.718971252441406, 164.2411346435547, 8.074302673339844, 13.990478515625, -69.99755859375, 65.0134506225586, 425.2227783203125, 365.1775817871094, 248.76499938964844, 69.14134216308594, 127.90438842773438, -90.73282623291016, 1.2105827331542969, 126.56975555419922, 283.87548828125, 25.47814178466797, 43.7357063293457, 106.89984130859375, 216.2293243408203, 39.30758285522461, -1.6649169921875, 55.86371612548828, 6.534975051879883, -99.12346649169922, 128.4001007080078, 75.64185333251953, 99.27638244628906, 254.88766479492188, 147.03131103515625, 36.70777893066406, 236.30685424804688, 1.42578125, 190.89170837402344, -37.477203369140625, 2.45880126953125, -74.08535766601562, -129.53468322753906, 36.103668212890625, 114.80736541748047, -0.5544872283935547, 2.7296981811523438, 10.005605697631836, 78.29632568359375, 322.3477783203125, 197.102294921875, 22.64251708984375, 197.8697509765625, -15.009126663208008, 51.65811538696289, 67.14972686767578, 20.37982940673828, 12.715690612792969, 139.82125854492188, 180.21144104003906, 81.43376159667969, 206.94976806640625, -31.773956298828125, -50.341766357421875, 20.130455017089844, 19.72064208984375, 52.20924377441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 107.83550262451172, "std": 147.0233154296875, "min": -239.347900390625, "p10": -32.622202491760234, "median": 90.02142715454102, "p90": 233.60023345947266, "max": 615.9207763671875, "pos_frac": 0.84375, "sample": [-134.25674438476562, 103.21205139160156, -39.787559509277344, 536.9305419921875, 119.33563232421875, 200.91827392578125, 342.9613037109375, 59.930362701416016, 153.9703826904297, 51.017295837402344, 113.48275756835938, 171.7901611328125, -142.19607543945312, 17.04332733154297, 6.56793212890625, 234.66575622558594, 151.11703491210938, 135.11911010742188, 87.3204345703125, 89.8091812133789, -15.903036117553711, -40.59526062011719, 36.59375762939453, 172.22853088378906, 15.16908073425293, 75.7007827758789, -68.06884002685547, 200.92547607421875, 100.63792419433594, 90.23367309570312, 89.27262115478516, 109.05022430419922, 140.24127197265625, 68.77464294433594, 123.89433288574219, 12.827106475830078, 81.26058959960938, 157.93649291992188, 146.07772827148438, 231.114013671875, -239.347900390625, 546.1002807617188, 97.35118865966797, 63.06525421142578, 6.618946075439453, 28.301864624023438, 254.9307861328125, -40.870262145996094, 3.9722824096679688, 72.12025451660156, 35.428096771240234, 90.89517211914062, 88.40924072265625, 143.13938903808594, -9.501367568969727, 155.79046630859375, 163.4965362548828, 210.8062744140625, 14.192497253417969, 432.60797119140625, -7.560028076171875, 615.9207763671875, 48.70283508300781, 140.57919311523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 104.99635314941406, "std": 142.15296936035156, "min": -168.97659301757812, "p10": -58.211077117919906, "median": 86.64527130126953, "p90": 288.8389770507813, "max": 465.87158203125, "pos_frac": 0.796875, "sample": [214.19650268554688, 170.74142456054688, -148.14511108398438, -9.414268493652344, 280.6901550292969, 72.12716674804688, -64.45350646972656, 193.4713592529297, -165.78720092773438, 120.72183990478516, 2.7801284790039062, -68.64828491210938, -168.97659301757812, 208.38113403320312, 211.1768798828125, 63.20093536376953, 13.038017272949219, 137.875, 402.0604248046875, 39.161643981933594, 78.31544494628906, 175.2665252685547, 372.431396484375, 309.7906494140625, -96.562744140625, 29.677047729492188, -27.46002960205078, 120.46846771240234, 214.4970703125, 27.295806884765625, 35.32090759277344, 255.54196166992188, 465.87158203125, 85.01472473144531, 73.31005096435547, 209.9036865234375, -24.48321533203125, 116.1049575805664, 24.4931640625, 105.22416687011719, 64.13270568847656, 273.5751037597656, 132.85287475585938, 36.0395622253418, 292.3313293457031, 122.78099822998047, 66.07229614257812, -43.645408630371094, -42.60257339477539, -165.16368103027344, 265.7830810546875, 83.67683410644531, 110.98184204101562, 88.27581787109375, 363.280517578125, 172.8551483154297, 8.284433364868164, 89.04474639892578, 59.53258514404297, 90.59352111816406, 46.84184265136719, 151.03466796875, 421.94268798828125, -22.953216552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 107.0880126953125, "std": 159.60740661621094, "min": -277.30255126953125, "p10": -48.73411178588866, "median": 78.48059844970703, "p90": 338.73414001464846, "max": 548.9191284179688, "pos_frac": 0.75, "sample": [233.96212768554688, 114.56452941894531, 61.01388168334961, 402.4295959472656, 42.33613967895508, 152.65509033203125, 14.697845458984375, -0.7885665893554688, 434.97332763671875, -93.09031677246094, 335.11346435546875, 247.94784545898438, 104.59003448486328, 8.42837905883789, 70.82684326171875, 330.82232666015625, 221.31472778320312, 409.067138671875, -2.3062744140625, 340.2858581542969, 69.58867645263672, -0.16179847717285156, 158.90869140625, 222.25148010253906, -23.888675689697266, 70.86190795898438, 169.48487854003906, -201.802490234375, 548.9191284179688, 95.8420639038086, 106.58912658691406, -30.700626373291016, -3.1072845458984375, 70.05166625976562, 52.97929382324219, -24.76887321472168, -277.30255126953125, 43.640106201171875, 106.98048400878906, 369.5539245605469, 6.586828231811523, 90.09973907470703, 59.64286804199219, -6.589851379394531, 4.495113372802734, -146.8876953125, 86.09928894042969, -55.15130615234375, 307.3393859863281, -33.760658264160156, 8.420753479003906, 423.36505126953125, 47.9622802734375, 28.590538024902344, 100.57606506347656, 268.72625732421875, 165.19793701171875, 176.22354125976562, 107.92427062988281, 181.03518676757812, -119.37344360351562, 146.48565673828125, -72.19685363769531, 126.05863189697266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 104.79566955566406, "std": 145.04031372070312, "min": -278.01397705078125, "p10": -51.72406158447264, "median": 86.45952606201172, "p90": 280.77595825195317, "max": 492.604736328125, "pos_frac": 0.8125, "sample": [237.77032470703125, 18.574026107788086, 228.16261291503906, -213.05792236328125, 47.6466064453125, -91.95906066894531, 8.614797592163086, 14.866437911987305, -26.916183471679688, 94.3189697265625, 2.176300048828125, 27.517822265625, 38.325897216796875, -7.5563812255859375, 472.7525329589844, 109.42229461669922, 61.20831298828125, 113.77313232421875, 27.830841064453125, 244.81527709960938, 352.85577392578125, 131.36212158203125, 6.4232635498046875, 94.19190979003906, 226.18203735351562, 288.43707275390625, -76.41654968261719, 223.00608825683594, 197.67947387695312, -10.246381759643555, 77.5400390625, 127.12481689453125, 139.85665893554688, 123.5338134765625, -37.593505859375, -69.55404663085938, 178.27923583984375, 74.00574493408203, 54.44188690185547, 492.604736328125, 116.74237060546875, -24.028404235839844, 166.7747802734375, 80.028564453125, 55.358238220214844, 173.580322265625, 163.67428588867188, 243.6284942626953, 397.2323913574219, 70.93598937988281, 90.49297332763672, -67.74090576171875, 153.7977752685547, 82.42607879638672, -278.01397705078125, 290.15008544921875, 428.0414733886719, 147.4910888671875, 12.135995864868164, -57.78001403808594, 262.9000244140625, 53.67668914794922, 12.009124755859375, 131.4083251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 73.4825210571289, "std": 142.9154815673828, "min": -213.33120727539062, "p10": -100.50401763916014, "median": 65.48975372314453, "p90": 258.6785095214844, "max": 514.6973876953125, "pos_frac": 0.703125, "sample": [514.6973876953125, -52.727535247802734, 125.00540924072266, 324.65869140625, 2.1754322052001953, -53.418373107910156, 196.05224609375, -54.94697570800781, 126.913818359375, 97.71587371826172, 53.51026916503906, -191.99757385253906, 175.65524291992188, 228.49786376953125, 214.31240844726562, 50.97941589355469, -194.721435546875, 49.81280517578125, 110.67158508300781, 346.78155517578125, 120.69904327392578, 284.0855712890625, -137.64608764648438, 192.75274658203125, 26.78595733642578, 30.232505798339844, 399.65771484375, -18.51753807067871, 104.03598022460938, 140.89892578125, 107.69841003417969, 129.2559051513672, 258.825927734375, -31.430374145507812, -37.018707275390625, -129.3917999267578, 79.93318176269531, 62.69140625, 12.655982971191406, 228.10400390625, 105.30645751953125, -5.6804962158203125, 21.306724548339844, 258.33453369140625, 172.81539916992188, 118.81916809082031, 83.3716049194336, -213.33120727539062, 68.28810119628906, 86.21197509765625, 13.878704071044922, 48.68218231201172, -35.35505676269531, -74.74126434326172, 44.946197509765625, -110.37248229980469, 85.39899444580078, -77.47760009765625, -43.495731353759766, 58.60655975341797, 265.7646789550781, -138.43405151367188, 98.99038696289062, -22.889209747314453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 91.44692993164062, "std": 124.95246887207031, "min": -171.889892578125, "p10": -64.70656509399413, "median": 73.48001861572266, "p90": 262.28246154785165, "max": 461.73895263671875, "pos_frac": 0.78125, "sample": [16.230701446533203, 136.2373809814453, 173.80026245117188, 56.99298858642578, -34.447879791259766, 290.5980529785156, -77.56816101074219, 150.7763671875, 30.661346435546875, 113.03195190429688, -2.6037216186523438, 111.0601577758789, 104.9930648803711, 168.7666015625, -108.08561706542969, 194.2529754638672, 146.5215301513672, -23.76678466796875, 199.76431274414062, 138.86634826660156, 284.7778625488281, 406.67987060546875, -88.8486328125, 113.96630859375, 132.64028930664062, 34.193023681640625, 461.73895263671875, 15.900272369384766, -94.3653564453125, 148.82444763183594, 6.431419372558594, 173.49813842773438, 194.5018310546875, 269.22271728515625, 37.248260498046875, 185.20875549316406, 82.86039733886719, 113.79767608642578, 76.81782531738281, 134.0489501953125, 32.729827880859375, 67.27552032470703, 52.932838439941406, 246.08853149414062, -59.50763702392578, -171.889892578125, -66.93467712402344, 70.1422119140625, -32.192535400390625, 6.108005523681641, 181.32644653320312, 30.27019500732422, 21.856548309326172, 48.74134826660156, -55.267822265625, -85.680419921875, 150.26058959960938, 67.45362854003906, 275.5325622558594, 323.8587341308594, -6.387300491333008, 228.4364471435547, 29.396997451782227, 22.828636169433594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 84.58815002441406, "std": 146.94601440429688, "min": -230.99237060546875, "p10": -77.76015167236325, "median": 65.46261405944824, "p90": 302.12508392334007, "max": 492.10052490234375, "pos_frac": 0.703125, "sample": [222.04881286621094, 72.46186828613281, 63.571468353271484, 44.41839599609375, 130.32273864746094, -8.581222534179688, 215.77490234375, -104.92542266845703, 91.41265106201172, 19.330713272094727, -38.75973129272461, 28.68703269958496, 14.487136840820312, 91.47272491455078, 56.866729736328125, -36.907447814941406, -230.99237060546875, 184.61593627929688, 53.1033935546875, 13.126794815063477, 249.3076934814453, -124.07463836669922, -18.336807250976562, -7.997386932373047, 71.51385498046875, -17.70754623413086, 77.73672485351562, 155.6717987060547, 154.94705200195312, -90.02285766601562, 399.1781921386719, -20.60236358642578, 56.25384521484375, 34.23900604248047, -49.14717102050781, 153.9950714111328, 58.00657653808594, 148.8974609375, 442.84844970703125, -215.6078338623047, 324.7611083984375, 173.2830352783203, 170.5815887451172, 81.00119018554688, 99.1119384765625, -9.423433303833008, 44.28529357910156, 236.08094787597656, 337.0819396972656, -12.288818359375, 85.15531921386719, 154.13180541992188, -126.8516845703125, 67.353759765625, 492.10052490234375, 376.0394287109375, -37.87174987792969, 157.58291625976562, 25.147241592407227, 374.8818359375, -96.0123291015625, 84.05306243896484, 113.16346740722656, -40.34093475341797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 113.03794860839844, "std": 145.2470703125, "min": -243.35784912109375, "p10": -71.09773788452148, "median": 117.41387176513672, "p90": 272.374057006836, "max": 407.3695983886719, "pos_frac": 0.78125, "sample": [39.764312744140625, 129.31973266601562, 217.51901245117188, 389.77435302734375, 118.03951263427734, 8.635461807250977, -6.27549934387207, 27.7083740234375, 12.63226318359375, 244.61190795898438, 274.208984375, -113.18359375, -96.52745056152344, 95.6697006225586, -18.01812744140625, 96.8653564453125, 20.86917495727539, 392.7664489746094, 249.8391571044922, 30.156078338623047, 116.7882308959961, 102.95578002929688, -147.16314697265625, 181.8253631591797, 320.64385986328125, 252.096435546875, 202.07028198242188, -7.387104034423828, -72.01688385009766, 208.998779296875, -47.21329116821289, 402.13763427734375, 143.61045837402344, 91.63040924072266, 34.203590393066406, 138.24533081054688, 407.3695983886719, 264.91015625, 119.58131408691406, 143.47764587402344, -226.14178466796875, 142.8193359375, 127.58904266357422, 233.68544006347656, 165.0463104248047, -68.95306396484375, 156.63742065429688, 97.63664245605469, 268.0925598144531, 90.9604721069336, 255.60096740722656, 198.8379669189453, 185.2181854248047, 72.89302825927734, 157.9756622314453, 363.7507019042969, 113.0645523071289, -12.333656311035156, 23.529983520507812, -243.35784912109375, -79.26274108886719, 223.9345703125, -26.56097412109375, 42.626068115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 120.99470520019531, "std": 131.75762939453125, "min": -240.75152587890625, "p10": -36.576860237121565, "median": 109.38289642333984, "p90": 295.71258544921886, "max": 418.68585205078125, "pos_frac": 0.84375, "sample": [50.617279052734375, 193.10870361328125, 85.15670776367188, 130.18418884277344, 418.68585205078125, 316.4415588378906, 80.61424255371094, 55.862648010253906, 195.05052185058594, 267.357666015625, 57.472747802734375, 142.6731719970703, 26.73085594177246, 110.16826629638672, -240.75152587890625, 65.16897583007812, 84.8409423828125, 153.5144500732422, 61.098907470703125, -17.843351364135742, 228.49627685546875, -44.605506896972656, 246.08059692382812, 364.5896911621094, 122.76502990722656, 196.35040283203125, 25.554931640625, 188.6363067626953, -109.82633972167969, -109.83712005615234, 194.0290985107422, 304.865478515625, 225.66241455078125, 247.90077209472656, 13.577394485473633, 264.71356201171875, 83.48551177978516, -62.957515716552734, 159.60977172851562, -157.5654754638672, 45.580406188964844, 230.13278198242188, 4.479465484619141, 160.8990478515625, 97.41492462158203, -13.976882934570312, 130.11749267578125, 84.01647186279297, 198.03858947753906, 99.4923095703125, -57.59104919433594, 18.64196014404297, 102.25666046142578, 194.25747680664062, 370.82672119140625, 274.3558349609375, -5.739641189575195, 128.36126708984375, 108.59752655029297, 329.5903015136719, 66.2532730102539, 25.248857498168945, 323.862548828125, 210.86618041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 139.16213989257812, "std": 158.24215698242188, "min": -139.79080200195312, "p10": -32.85572128295898, "median": 99.4476547241211, "p90": 317.5259124755859, "max": 591.0018920898438, "pos_frac": 0.828125, "sample": [27.93722152709961, -139.79080200195312, 71.62158966064453, 111.21333312988281, 180.8687744140625, 44.73479461669922, 314.8408203125, -34.16845703125, -16.246292114257812, 10.48797607421875, 99.62435913085938, 84.9347915649414, 312.4486389160156, -8.581741333007812, 144.47048950195312, 174.86981201171875, 45.31871032714844, 102.04521179199219, 30.061622619628906, 37.37205505371094, 57.17475509643555, 162.817138671875, 31.68456268310547, 268.71954345703125, 36.85267639160156, -54.071075439453125, 318.6766662597656, 66.20904541015625, 31.063947677612305, -55.73763656616211, 291.6160583496094, 260.111083984375, -49.250160217285156, -0.03148651123046875, 31.515457153320312, 271.6636962890625, 36.800567626953125, 214.66062927246094, 168.08349609375, 257.99993896484375, 264.4749755859375, 145.4290771484375, 110.39791870117188, 140.4356689453125, 287.4504089355469, 256.82958984375, 99.32200622558594, 395.799560546875, 531.435302734375, 291.584228515625, 46.34825134277344, 440.211669921875, -55.41229248046875, 49.05095672607422, 591.0018920898438, 43.427490234375, 94.90684509277344, 99.57330322265625, 94.04673767089844, 528.1727294921875, -85.5522232055664, 193.06802368164062, 433.5455627441406, -29.79267120361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 110.5765380859375, "std": 163.9280548095703, "min": -353.39410400390625, "p10": -38.34215011596679, "median": 80.05517578125, "p90": 342.5555969238282, "max": 454.82623291015625, "pos_frac": 0.796875, "sample": [-225.3746795654297, 155.38583374023438, -167.14105224609375, 318.8551025390625, 20.49346923828125, -17.697406768798828, 403.2852783203125, 26.136106491088867, 128.13804626464844, 267.88836669921875, 234.0420684814453, 72.8440170288086, 76.82244873046875, 166.9828643798828, 212.81048583984375, 56.08196258544922, 445.26397705078125, 4.06829833984375, 454.82623291015625, 137.8951873779297, 0.808319091796875, 14.981605529785156, 263.1018371582031, 175.72030639648438, 78.30430603027344, 23.69818878173828, 61.0487060546875, 9.13873291015625, -139.9877166748047, 295.63818359375, -137.3133087158203, -9.373416900634766, 52.79245376586914, 122.11919403076172, 133.34552001953125, 310.22100830078125, 271.1796875, 35.61149215698242, -10.302543640136719, 242.44140625, -30.3128662109375, 76.70263671875, 29.821273803710938, 368.0831298828125, 145.25015258789062, 352.71295166015625, 115.1353759765625, -353.39410400390625, -41.78327178955078, 170.60362243652344, 16.483856201171875, 119.64256286621094, 367.8414611816406, 103.36732482910156, 57.93486022949219, -148.65155029296875, 21.360443115234375, 395.05438232421875, 183.81332397460938, 296.33026123046875, 218.3134765625, 81.80604553222656, -7.225982666015625, -26.7720947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 97.8099365234375, "std": 158.91526794433594, "min": -347.19793701171875, "p10": -87.92796630859375, "median": 89.34767532348633, "p90": 334.8020172119141, "max": 441.60662841796875, "pos_frac": 0.71875, "sample": [346.350341796875, 80.01641845703125, 168.02853393554688, -43.61144256591797, 387.08428955078125, 429.37884521484375, 340.0760498046875, 127.46247863769531, 142.13430786132812, 106.64547729492188, -69.34614562988281, 103.35388946533203, 177.774658203125, 90.49535369873047, 172.6298370361328, 147.31814575195312, 166.53253173828125, -161.7915802001953, -88.70175170898438, 48.4683837890625, 265.5400390625, 250.463134765625, -4.657968521118164, -96.93954467773438, 56.449066162109375, -52.8355712890625, -30.89251136779785, 6.708702087402344, 104.03599548339844, 160.7996826171875, 123.98806762695312, 88.19999694824219, -126.59264373779297, -43.231266021728516, 71.65132904052734, 64.27951049804688, -86.12246704101562, -24.846553802490234, 382.8155212402344, 295.61651611328125, 45.36761474609375, 441.60662841796875, 2.22247314453125, -52.766258239746094, -24.463577270507812, 45.89257049560547, 66.29399108886719, 77.449462890625, 264.614501953125, 143.0951385498047, 144.0618896484375, -42.78717803955078, -127.59814453125, -347.19793701171875, 185.40887451171875, 111.17437744140625, 37.931827545166016, 51.441741943359375, 139.13711547851562, -167.7195281982422, 354.031005859375, 322.4959411621094, 230.36520385742188, 285.0502014160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 112.94706726074219, "std": 153.76242065429688, "min": -259.88543701171875, "p10": -27.921669006347653, "median": 84.37294006347656, "p90": 340.4181274414064, "max": 478.63177490234375, "pos_frac": 0.734375, "sample": [-21.48455810546875, 35.422752380371094, 136.25900268554688, 77.57496643066406, -16.851364135742188, -73.92071533203125, 188.885009765625, -14.757108688354492, 131.86265563964844, -31.71874237060547, 196.13186645507812, 28.72492218017578, 29.552532196044922, 118.0623550415039, 284.20318603515625, 156.11138916015625, -16.542129516601562, 75.10755920410156, 229.98446655273438, 20.482364654541016, 304.17999267578125, 110.64752197265625, 21.37211036682129, 263.77178955078125, 298.4888610839844, 3.1567440032958984, 20.74285125732422, 354.5633544921875, 199.49795532226562, 354.8852844238281, 127.4668960571289, 261.970703125, 117.11206817626953, 306.94219970703125, -148.73841857910156, -29.81756591796875, -79.45188903808594, -23.497909545898438, 34.48200988769531, 18.76784896850586, 130.65989685058594, 180.85549926757812, 365.26788330078125, 107.22030639648438, -8.258077621459961, -19.165496826171875, 445.9878234863281, -16.113372802734375, 128.58192443847656, 160.9862060546875, 307.41259765625, 478.63177490234375, 299.1895751953125, 51.33319091796875, -70.68075561523438, 91.17091369628906, -14.666130065917969, -2.609851837158203, 13.796817779541016, 375.506103515625, 5.094139099121094, 16.70989227294922, -259.88543701171875, 411.95361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 97.30603790283203, "std": 172.7158966064453, "min": -246.95819091796875, "p10": -59.583396148681636, "median": 62.18922424316406, "p90": 279.88925476074223, "max": 609.9202880859375, "pos_frac": 0.71875, "sample": [121.9740982055664, -51.07281494140625, 261.76776123046875, 95.43352508544922, 75.16653442382812, 170.6605224609375, -56.06285095214844, 115.83475494384766, 551.1483764648438, -246.95819091796875, -27.37908172607422, 53.68312072753906, 28.96249771118164, 14.29608154296875, 82.27761840820312, 230.00709533691406, 162.9590301513672, 209.6982421875, 169.91293334960938, -10.316696166992188, -0.8228530883789062, -49.5908203125, 207.52447509765625, 115.17584228515625, -61.092201232910156, 9.240890502929688, -50.055946350097656, 521.4686279296875, 45.628971099853516, 609.9202880859375, 492.447998046875, -42.52355194091797, -142.34075927734375, 153.73057556152344, 255.88327026367188, 343.11309814453125, -164.934814453125, 143.9619903564453, 46.10771179199219, -7.3105010986328125, 139.04055786132812, 450.9228515625, -30.84229278564453, 76.5770263671875, 4.965909957885742, 48.109458923339844, 68.51579284667969, -94.35786437988281, 20.13113784790039, 204.62203979492188, -166.8939971923828, 55.86265563964844, 212.5360870361328, 49.57609558105469, 165.62318420410156, 287.6556091308594, 101.4814224243164, 248.96237182617188, 21.896774291992188, 135.42616271972656, 39.63785171508789, 18.52593994140625, -13.553237915039062, -194.36016845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 137.662841796875, "std": 180.38011169433594, "min": -280.2103271484375, "p10": -65.66937561035154, "median": 137.407470703125, "p90": 343.38566894531255, "max": 618.1883544921875, "pos_frac": 0.75, "sample": [-280.2103271484375, -94.4764633178711, 330.9795837402344, 291.6474914550781, 58.306861877441406, 254.1715087890625, 109.45166015625, -75.61972045898438, -42.451904296875, -3.0979843139648438, 250.1958770751953, 201.67791748046875, -225.33457946777344, -34.1490478515625, -26.978252410888672, 5.542045593261719, 347.94268798828125, 224.58047485351562, 459.72406005859375, 259.25054931640625, 120.18984985351562, -108.44489288330078, 30.53415298461914, 108.65284729003906, 148.82275390625, 172.92312622070312, 236.71475219726562, -80.51983642578125, 321.98974609375, -11.351877212524414, 140.19711303710938, 187.1994171142578, 618.1883544921875, 189.54115295410156, 55.733856201171875, -246.76087951660156, 350.9556884765625, -40.67152404785156, 134.61782836914062, 567.9386596679688, 304.7470703125, 130.88165283203125, 30.3236026763916, 248.01055908203125, 14.647865295410156, 116.37591552734375, -29.920780181884766, 59.35689163208008, 332.75262451171875, 52.794193267822266, 156.09661865234375, 381.78289794921875, 418.40899658203125, 252.370361328125, 323.5242919921875, -20.99881362915039, 236.8834686279297, 66.49269104003906, 161.1722412109375, 148.62850952148438, 70.48374938964844, 305.6065673828125, -40.2330322265625, 182.63177490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 131.33126831054688, "std": 161.22586059570312, "min": -232.87466430664062, "p10": -65.63146057128904, "median": 116.81184387207031, "p90": 353.2134735107422, "max": 456.8680419921875, "pos_frac": 0.8125, "sample": [304.63897705078125, -98.40972900390625, 35.36815643310547, -114.03287506103516, 231.44554138183594, 16.596206665039062, 67.61624908447266, 112.02999877929688, -232.87466430664062, -89.56326293945312, 12.475637435913086, 349.33807373046875, 354.8743591308594, -43.64622497558594, 94.99610137939453, 184.37301635742188, 8.787347793579102, 63.54835510253906, 167.21624755859375, 328.3265380859375, 456.8680419921875, 334.0918273925781, 62.04985809326172, 425.597412109375, 176.4403839111328, 290.63946533203125, 383.2846374511719, -12.25345230102539, -3.9526748657226562, 7.362842559814453, 194.300048828125, 133.36346435546875, 205.84268188476562, 135.56068420410156, -50.24139404296875, 12.13427734375, 171.45791625976562, 361.90185546875, 299.78887939453125, 167.34927368164062, -15.542232513427734, 205.45872497558594, 343.8507385253906, -72.22720336914062, 226.77037048339844, 60.322021484375, 189.65673828125, 451.9640197753906, -150.989501953125, 214.4971923828125, 266.6304016113281, 311.449462890625, 363.15533447265625, 34.84822463989258, 24.243789672851562, 5.820095062255859, -140.2637939453125, 74.68118286132812, 105.04475402832031, 56.40332794189453, 20.77196502685547, 22.680557250976562, 179.69180297851562, 121.59368896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 118.94904327392578, "std": 156.64462280273438, "min": -190.0250244140625, "p10": -54.36636276245117, "median": 87.01128387451172, "p90": 307.1671020507813, "max": 554.447021484375, "pos_frac": 0.765625, "sample": [73.48918914794922, 421.3595886230469, 18.856914520263672, 200.6488037109375, -8.156469345092773, -10.942441940307617, -87.85700988769531, -190.0250244140625, 291.5816955566406, -78.90438842773438, 168.40292358398438, 42.29126739501953, 94.34864807128906, -0.6033115386962891, 30.117095947265625, 104.61846923828125, 243.6876220703125, -25.381479263305664, 25.714920043945312, 203.33816528320312, 67.65713500976562, 78.17232513427734, 111.05742645263672, 285.00079345703125, 276.277587890625, 71.2557601928711, 12.168510437011719, -6.078760147094727, 136.0915985107422, -46.64765167236328, 102.58424377441406, 60.07843780517578, 0.8525009155273438, 340.7890319824219, -91.38355255126953, 554.447021484375, 369.0707702636719, 7.0852203369140625, 31.89190673828125, 415.3519592285156, 235.5805206298828, 212.66351318359375, 296.39068603515625, 311.7057189941406, -148.24803161621094, 541.1656494140625, 31.212560653686523, 202.41738891601562, 222.43966674804688, 110.6980972290039, -62.778343200683594, 194.57749938964844, -48.204345703125, 133.35020446777344, -55.07270812988281, 247.55877685546875, 62.7368049621582, 296.5769958496094, 68.71435546875, 171.7305908203125, 126.60972595214844, 141.65061950683594, -52.718223571777344, 79.67391967773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 127.212158203125, "std": 145.72021484375, "min": -151.93128967285156, "p10": -73.14778594970703, "median": 104.40730667114258, "p90": 308.5894287109375, "max": 544.8550415039062, "pos_frac": 0.84375, "sample": [68.25326538085938, 331.8092041015625, 130.19747924804688, 289.94915771484375, -75.36880493164062, -72.13917541503906, 40.298301696777344, 125.40508270263672, 158.48306274414062, 104.40279388427734, 136.4264678955078, 56.948211669921875, 11.458230972290039, -39.468902587890625, 458.05535888671875, 280.2892761230469, 261.8507080078125, 307.776123046875, 544.8550415039062, 182.03555297851562, 104.41181945800781, 416.312744140625, 132.25015258789062, -95.83113098144531, 75.11358642578125, 211.32669067382812, -73.58004760742188, 76.44986724853516, 192.5478515625, 267.7769775390625, 33.63134765625, 82.62008666992188, 85.64425659179688, -1.467031478881836, 174.1451873779297, 103.14631652832031, -135.79879760742188, 161.8585662841797, 206.95904541015625, 283.4014587402344, 354.1036682128906, 132.09146118164062, 204.52142333984375, 1.5499267578125, 153.7528076171875, 52.63946533203125, 50.646263122558594, -74.49754333496094, 56.761898040771484, 249.69320678710938, -151.93128967285156, 138.87448120117188, 429.8896484375, 26.74077606201172, 98.21115112304688, 90.7071533203125, 308.93798828125, 10.932632446289062, 201.90155029296875, 44.964393615722656, 110.44862365722656, 45.37519836425781, -101.16984558105469, 103.99767303466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 124.02897644042969, "std": 178.04100036621094, "min": -300.0345458984375, "p10": -78.95714950561523, "median": 104.08321380615234, "p90": 382.1904663085937, "max": 556.9425048828125, "pos_frac": 0.75, "sample": [225.2750244140625, 433.8848876953125, -83.54271697998047, 142.09030151367188, 213.68161010742188, 41.256263732910156, -193.13880920410156, 173.78524780273438, 230.17503356933594, 33.03589630126953, 556.9425048828125, 119.11119079589844, 109.53443908691406, -80.84280395507812, -42.7011604309082, -19.8775634765625, -95.83995819091797, 219.72482299804688, 40.60702133178711, -201.39315795898438, 11.665145874023438, 345.6900634765625, -12.397537231445312, -4.52459716796875, 105.02006530761719, 159.4185333251953, 53.80572509765625, 188.1715545654297, 120.13565063476562, -34.372650146484375, 103.1463623046875, 200.36822509765625, 30.7249755859375, -50.63216018676758, 410.7411804199219, 382.58062744140625, -63.84504699707031, 319.84716796875, 43.63139343261719, 78.21248626708984, 53.40239715576172, 301.1462707519531, 7.745691299438477, -300.0345458984375, 80.48963928222656, 198.11328125, 223.54339599609375, 419.3609619140625, 58.59202575683594, -74.55728912353516, 287.3464050292969, 48.37156677246094, 134.62344360351562, 180.76907348632812, -62.062721252441406, 355.75372314453125, 261.1352844238281, 364.3472900390625, 434.90582275390625, 406.62042236328125, 20.000167846679688, -83.2161865234375, 31.023406982421875, 381.28009033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 146.61756896972656, "std": 169.09693908691406, "min": -177.31224060058594, "p10": -24.78770217895507, "median": 96.81541061401367, "p90": 391.2022216796875, "max": 578.2626953125, "pos_frac": 0.84375, "sample": [156.10562133789062, 141.63101196289062, 94.5044174194336, 155.55422973632812, 59.01892852783203, 61.95905303955078, 74.72804260253906, 254.9392852783203, 73.03276824951172, 100.226318359375, 59.671173095703125, 394.08331298828125, 154.68414306640625, 384.47967529296875, -115.63666534423828, 1.7978744506835938, 40.239051818847656, -16.087631225585938, 210.7781982421875, 578.2626953125, 4.790920257568359, 69.18683624267578, 327.1558532714844, 452.3442687988281, 97.21251678466797, 227.61090087890625, -28.51630401611328, 70.30113220214844, -177.31224060058594, 117.03694915771484, 517.8382568359375, 199.88235473632812, 3.441801071166992, 70.53509521484375, 291.6365051269531, 4.06201171875, 264.1298522949219, 295.6358642578125, -14.538864135742188, 536.8430786132812, 474.8359375, -111.6331558227539, 381.9281005859375, 297.16107177734375, 151.7495880126953, 490.7543640136719, -2.8269500732421875, 96.41830444335938, 76.13007354736328, 74.98584747314453, 61.14439392089844, 177.23995971679688, 265.1066589355469, 11.966859817504883, 154.82476806640625, 74.7391586303711, -44.15308380126953, -38.14844512939453, 31.44696044921875, -78.39768981933594, 140.92245483398438, 223.2028350830078, 255.3960418701172, 25.481332778930664], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 111.91874694824219, "std": 172.48976135253906, "min": -280.841796875, "p10": -83.80645446777343, "median": 93.90596771240234, "p90": 347.73613281250005, "max": 606.369873046875, "pos_frac": 0.734375, "sample": [94.22398376464844, 58.31336212158203, 189.24630737304688, 265.4901123046875, 155.35369873046875, 9.18271255493164, 355.1723937988281, 82.93446350097656, 8.37502670288086, 49.96033477783203, 151.00070190429688, 51.7083740234375, -14.059738159179688, 135.12657165527344, 466.87664794921875, 193.76011657714844, 124.66098022460938, -27.914749145507812, 149.78839111328125, 194.37405395507812, 42.92402648925781, 56.21825408935547, -7.996646881103516, 256.2594299316406, -76.78050231933594, -280.841796875, 330.3848571777344, 104.68821716308594, 205.41737365722656, 217.99417114257812, 44.02629852294922, 436.451416015625, 468.25433349609375, 250.48477172851562, 213.254638671875, 64.70002746582031, -20.78449058532715, -21.097665786743164, 93.58795166015625, -24.833133697509766, -22.3900146484375, -102.3244400024414, 158.64120483398438, -122.09417724609375, -84.6561050415039, -53.109779357910156, 606.369873046875, 359.6834716796875, 207.17262268066406, -81.82393646240234, -158.19534301757812, 256.1927490234375, 475.5799560546875, 13.556913375854492, 37.059391021728516, 103.41183471679688, 162.58938598632812, 109.29956817626953, 38.073448181152344, -183.23495483398438, 84.36783599853516, -126.50083923339844, 324.45361328125, 114.79196166992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 96.66064453125, "std": 186.7349853515625, "min": -465.6213073730469, "p10": -124.03875885009765, "median": 78.70397186279297, "p90": 369.901107788086, "max": 503.250244140625, "pos_frac": 0.78125, "sample": [487.28558349609375, 164.55506896972656, 7.72547721862793, 352.1461486816406, 3.634523391723633, 443.000244140625, -465.6213073730469, 12.080511093139648, 39.56378173828125, 333.6408386230469, -14.149375915527344, 102.02941131591797, 214.16006469726562, 476.51409912109375, 503.250244140625, 191.77630615234375, 59.510047912597656, 257.1372985839844, 109.78439331054688, 140.71739196777344, 289.3037109375, 35.05873107910156, -131.56781005859375, 3.204437255859375, -66.7530288696289, -74.23809051513672, 377.5103759765625, -6.041500091552734, 378.1148986816406, 423.6898498535156, 42.39507293701172, 293.718017578125, 3.9406585693359375, 53.2557373046875, 11.038284301757812, 196.05255126953125, 118.80530548095703, 19.737220764160156, 112.55770874023438, 27.64910888671875, 313.74853515625, 145.53570556640625, 102.34616088867188, -191.74844360351562, 88.06446838378906, 168.28871154785156, -203.55426025390625, 6.654474258422852, 10.765634536743164, 251.8771514892578, 28.2369384765625, -142.9114990234375, 2.3278579711914062, -58.0085563659668, -239.8477020263672, 84.39421844482422, 171.87193298339844, 79.92695617675781, -114.48753356933594, -45.41261291503906, 162.00460815429688, 77.48098754882812, 90.68778228759766, -128.13214111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 79.91914367675781, "std": 134.0648956298828, "min": -275.5493469238281, "p10": -80.87369461059569, "median": 79.25606536865234, "p90": 209.32471466064453, "max": 470.5345764160156, "pos_frac": 0.75, "sample": [183.45932006835938, 51.66730499267578, 65.25515747070312, -30.684654235839844, 39.672035217285156, 315.869384765625, 356.7330627441406, 7.720829010009766, 159.2959747314453, 147.34202575683594, 115.44852447509766, 285.1653137207031, 143.32186889648438, 158.91769409179688, -237.81463623046875, 122.32388305664062, 184.7277069091797, 36.400421142578125, 130.77670288085938, 217.57937622070312, 84.25172424316406, 23.167024612426758, 2.6976165771484375, -275.5493469238281, -83.2874984741211, 208.5530548095703, -28.65923500061035, 52.917022705078125, 206.00025939941406, 69.34684753417969, -75.24148559570312, 164.69541931152344, 113.76728820800781, -12.989120483398438, 243.8562774658203, 123.15714263916016, 470.5345764160156, 179.59141540527344, -13.066841125488281, 105.78868865966797, 17.77836799621582, 53.03627395629883, 1.5951480865478516, 125.16963195800781, -124.72356414794922, 158.709228515625, 58.84228515625, 155.56398010253906, -139.7310028076172, 74.26040649414062, 72.07752990722656, -5.866020202636719, 145.078125, -46.876800537109375, 57.07303237915039, 162.12130737304688, 175.27029418945312, -72.52825164794922, 192.87069702148438, 209.65542602539062, -18.901897430419922, 157.47021484375, -142.75863647460938, -163.06884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 179.11788940429688, "std": 168.99539184570312, "min": -187.42437744140625, "p10": -25.37746963500975, "median": 199.9255599975586, "p90": 388.77585754394534, "max": 551.470703125, "pos_frac": 0.84375, "sample": [80.66419219970703, 122.61026000976562, 235.2709503173828, -7.752933502197266, 114.59919738769531, 551.470703125, 9.340217590332031, 388.9471435546875, 212.41696166992188, 289.10186767578125, 325.3620910644531, 14.13156509399414, 309.1248779296875, 265.1894226074219, 157.70054626464844, 289.89208984375, -8.59112548828125, 473.5082092285156, 0.8154430389404297, 102.2242660522461, 214.07492065429688, 311.11883544921875, 413.38775634765625, 500.3173828125, 455.0499267578125, 268.1763000488281, 247.73095703125, -187.42437744140625, 59.14788818359375, 73.14232635498047, 182.69876098632812, 211.74879455566406, -35.64855194091797, 82.5132064819336, 69.83944702148438, 189.61224365234375, 118.50092315673828, -153.4124298095703, 280.0934143066406, 377.85418701171875, -32.571617126464844, 296.47625732421875, 259.3922424316406, 338.5976867675781, 130.37911987304688, 237.30230712890625, 36.22782897949219, 388.3761901855469, 342.31549072265625, 240.09796142578125, -48.627376556396484, -105.9233627319336, -6.637672424316406, 279.0353698730469, 46.26249694824219, -87.11947631835938, 22.25371742248535, 119.75691986083984, 46.89316177368164, 216.06375122070312, 321.44952392578125, 94.33549499511719, 514.421630859375, 210.23887634277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 133.98260498046875, "std": 170.65211486816406, "min": -226.14459228515625, "p10": -58.74530715942382, "median": 121.45122146606445, "p90": 368.97888183593756, "max": 579.4122314453125, "pos_frac": 0.765625, "sample": [-50.49829864501953, 473.3328552246094, -14.606002807617188, -116.75762939453125, -15.731986999511719, -105.55055236816406, -20.698089599609375, 130.9797821044922, -1.62127685546875, 239.66232299804688, 109.10771942138672, 125.57239532470703, 473.40692138671875, 100.217041015625, 122.0514144897461, 249.13040161132812, -226.14459228515625, 153.05975341796875, 270.4399719238281, 184.58912658691406, 162.4270782470703, 120.85102844238281, -204.24749755859375, 336.9060363769531, -134.70855712890625, 284.5035400390625, 512.8466186523438, 99.28886413574219, 27.33502960205078, 15.578948974609375, 156.8157958984375, 24.726150512695312, 199.44439697265625, -48.865211486816406, 205.44761657714844, 210.4827423095703, 61.249176025390625, 153.70730590820312, -20.71719741821289, 218.2320556640625, -21.707496643066406, 94.77909851074219, 93.0696792602539, 87.34565734863281, 349.6092529296875, 259.79754638671875, 120.38484954833984, 272.61663818359375, 156.25477600097656, 48.719970703125, -62.27973937988281, 167.64768981933594, 377.2801513671875, 66.71224975585938, 446.7343444824219, 190.62225341796875, 140.6552276611328, -107.58186340332031, 229.65078735351562, 22.866783142089844, 579.4122314453125, 59.262939453125, 429.6964111328125, 112.09172058105469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 113.74102783203125, "std": 185.8060302734375, "min": -353.3131103515625, "p10": -120.54466018676757, "median": 120.98477172851562, "p90": 331.13175964355474, "max": 650.4185180664062, "pos_frac": 0.75, "sample": [68.49513244628906, -73.59850311279297, 219.48233032226562, 152.53326416015625, 150.21209716796875, 380.07452392578125, 201.07376098632812, 114.60283660888672, 218.81446838378906, 160.81939697265625, -160.8572998046875, 66.3375244140625, 81.29570007324219, 249.55841064453125, 316.9427490234375, 287.39208984375, 146.05670166015625, 99.68434143066406, 570.4594116210938, -17.104228973388672, 140.52989196777344, -55.375450134277344, 88.10932922363281, -62.99502944946289, -65.39703369140625, 321.2872619628906, -125.29560852050781, 378.87420654296875, 108.53600311279297, -136.9039306640625, 4.0575408935546875, 10.578567504882812, 283.53094482421875, -353.3131103515625, 130.59140014648438, 162.33316040039062, -194.3260955810547, 208.13339233398438, 75.00166320800781, 650.4185180664062, 258.30810546875, -109.45911407470703, 184.0464630126953, 285.150146484375, -170.37338256835938, -60.60771560668945, 301.5491638183594, 350.96832275390625, 165.555908203125, -18.593414306640625, 53.310325622558594, 32.720115661621094, 73.92401123046875, 436.8846435546875, 25.489784240722656, 16.16309356689453, -12.548894882202148, 8.300321578979492, 148.59429931640625, 132.23602294921875, 127.36670684814453, 169.77359008789062, -255.33352661132812, 335.350830078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 129.87673950195312, "std": 191.08526611328125, "min": -347.82672119140625, "p10": -77.42566223144532, "median": 72.89033126831055, "p90": 378.6287017822267, "max": 689.7247314453125, "pos_frac": 0.8125, "sample": [251.65687561035156, 5.5846099853515625, 149.31129455566406, 34.304344177246094, -94.56084442138672, 561.8713989257812, 250.6652374267578, 47.18535614013672, 75.61113739013672, -347.82672119140625, 52.49486541748047, 196.63926696777344, 173.2773895263672, 160.67835998535156, 466.927001953125, 91.81478881835938, 57.99504852294922, 209.7158966064453, 197.5002899169922, 684.6193237304688, 287.5033874511719, 120.37530517578125, 345.1725769042969, 263.7352294921875, -121.86190795898438, -75.07681274414062, 295.9235534667969, 64.79731750488281, 240.11618041992188, -21.115692138671875, 30.901046752929688, 45.406890869140625, 40.485862731933594, 689.7247314453125, 267.3614501953125, 259.24774169921875, 120.71857452392578, -85.0918960571289, 21.335172653198242, 34.70634078979492, 236.82040405273438, 2.80157470703125, 52.90504455566406, -31.746318817138672, 115.38868713378906, -80.7871322631836, -66.48921203613281, -8.094528198242188, -99.51600646972656, 34.65622329711914, 191.17420959472656, 143.79629516601562, 392.967041015625, 4.898759841918945, 6.753631591796875, 70.16952514648438, 10.094573974609375, 341.5304870605469, 40.322357177734375, -78.43231201171875, 90.53706359863281, 433.63031005859375, 450.4085998535156, 8.49178695678711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 149.9493865966797, "std": 185.6443634033203, "min": -270.8898010253906, "p10": -87.70649642944335, "median": 156.0700225830078, "p90": 398.36600646972664, "max": 750.0345458984375, "pos_frac": 0.765625, "sample": [247.85906982421875, 169.72842407226562, 413.5628662109375, 367.2030944824219, 64.06954193115234, 99.15929412841797, 14.57763671875, -95.84326171875, -136.22933959960938, 430.51171875, 372.063232421875, 293.3793029785156, -147.10914611816406, -56.639366149902344, 305.07867431640625, -26.210857391357422, -100.37897491455078, 0.7925014495849609, -15.294292449951172, 66.84165954589844, 112.017333984375, 141.1676025390625, 235.0062255859375, 177.53128051757812, -10.563997268676758, 156.15884399414062, 67.89289855957031, 167.29052734375, 2.2872962951660156, 241.38116455078125, 326.23687744140625, 170.4280242919922, 226.3570098876953, 115.51416778564453, -33.1124153137207, 474.38336181640625, 445.57379150390625, 3.8160629272460938, 246.2257080078125, 407.2898254394531, 102.97407531738281, 343.81231689453125, 195.38807678222656, -26.5622501373291, 166.0268096923828, 139.114990234375, 750.0345458984375, 292.9107666015625, 181.16482543945312, 85.12992858886719, 414.54803466796875, 1.8646240234375, 263.1509704589844, -102.8604736328125, -102.00093078613281, 377.54376220703125, 306.3017578125, 180.82388305664062, 167.33901977539062, -270.8898010253906, 113.69925689697266, 155.981201171875, -68.72071075439453, -10.017120361328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 119.1348648071289, "std": 175.08644104003906, "min": -214.5522003173828, "p10": -39.362146759033195, "median": 73.94813537597656, "p90": 345.69939880371095, "max": 635.7626953125, "pos_frac": 0.765625, "sample": [635.7626953125, -160.89044189453125, -42.6029167175293, 17.119659423828125, -16.704017639160156, 189.1812286376953, -4.482269287109375, 69.36299133300781, 191.03829956054688, -31.800350189208984, 115.54784393310547, 18.272613525390625, -18.985511779785156, 43.550193786621094, -180.31956481933594, 98.18867492675781, -117.45973205566406, 345.7250061035156, 302.30230712890625, 33.65538787841797, 345.6396484375, 352.4043884277344, 66.7514877319336, 122.94538879394531, 206.55113220214844, 315.0711669921875, 25.907432556152344, -30.488510131835938, 116.48861694335938, 447.7655944824219, 280.938720703125, -78.92236328125, 191.88079833984375, 78.53327941894531, 62.339141845703125, 543.4807739257812, 253.4608154296875, 524.897216796875, 289.1957092285156, -7.704154968261719, -5.341361999511719, 144.897216796875, -214.5522003173828, 161.49090576171875, 44.02324295043945, 108.32444763183594, 31.02777862548828, 10.69952392578125, 15.503427505493164, 83.7052993774414, -3.080474853515625, 262.92303466796875, 110.66897583007812, 196.78814697265625, 45.870201110839844, 130.3184356689453, -91.78244018554688, 5.879890441894531, 517.5598754882812, 53.84496307373047, 47.35803985595703, 136.0171661376953, 16.363616943359375, 222.5251922607422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 148.35679626464844, "std": 177.74441528320312, "min": -366.3974304199219, "p10": -22.00605888366698, "median": 115.9819564819336, "p90": 378.9562225341797, "max": 647.1830444335938, "pos_frac": 0.84375, "sample": [307.02056884765625, 105.79595947265625, 170.68325805664062, 402.89215087890625, 230.5849609375, 45.412757873535156, 49.02851867675781, 105.86217498779297, 327.90826416015625, 19.021942138671875, 120.39505767822266, 119.54353332519531, 196.64678955078125, 18.62623405456543, -26.526187896728516, 32.19190216064453, 170.70907592773438, 103.82884216308594, 210.4824676513672, 647.1830444335938, 167.55496215820312, 103.31390380859375, 340.4072570800781, 206.47593688964844, 63.76850128173828, 294.0513916015625, 168.144775390625, 252.12454223632812, 112.42037963867188, 98.38519287109375, 165.17266845703125, 81.16389465332031, 339.0384521484375, 6.250431060791016, -84.20005798339844, -110.85917663574219, 207.95556640625, 209.96343994140625, 539.5734252929688, 109.07333374023438, 200.4625701904297, 432.60418701171875, -111.15113067626953, 592.5298461914062, -8.740785598754883, 18.53443145751953, 187.5906982421875, 48.774024963378906, 225.83685302734375, 38.92893981933594, 131.4330291748047, 380.7718505859375, 94.5084228515625, -11.459091186523438, 5.602502822875977, -46.51807403564453, 374.7197570800781, -366.3974304199219, -8.47193717956543, 95.48455810546875, -75.02397918701172, 23.040346145629883, 133.74990844726562, 510.9553527832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 106.68173217773438, "std": 172.11874389648438, "min": -271.3731994628906, "p10": -106.36674804687499, "median": 108.62459182739258, "p90": 331.0913452148438, "max": 529.4962158203125, "pos_frac": 0.734375, "sample": [-181.00726318359375, -257.69549560546875, -271.3731994628906, 251.38552856445312, 145.354736328125, 350.57330322265625, -205.8514404296875, 178.61935424804688, 178.203857421875, -167.52186584472656, 18.914306640625, 303.1737365722656, 529.4962158203125, 326.0482177734375, 456.3587646484375, 333.252685546875, 371.4068603515625, 192.0448455810547, 123.59352111816406, -28.572235107421875, -63.30424499511719, 319.31378173828125, 116.12625885009766, 340.7513427734375, 188.169921875, 162.3563995361328, 128.55462646484375, 21.329692840576172, -108.34319305419922, 109.44486999511719, 50.98741149902344, 203.42237854003906, 2.1018829345703125, 194.23818969726562, 234.19869995117188, 100.18863677978516, 123.83979034423828, 82.4853515625, 27.646011352539062, 40.343597412109375, 100.0314712524414, 247.06997680664062, -63.926971435546875, -20.94598388671875, 97.63685607910156, 107.80431365966797, 440.2757263183594, 21.237213134765625, 92.70182800292969, 57.424537658691406, 273.53802490234375, -6.938266754150391, -21.295230865478516, 195.25942993164062, 233.3126983642578, 180.93136596679688, 212.0010528564453, 115.08934783935547, -101.75504302978516, -34.486785888671875, -21.005027770996094, 39.75456237792969, -184.0140380859375, -52.32555389404297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 114.44705200195312, "std": 170.4741668701172, "min": -414.19415283203125, "p10": -63.161273956298814, "median": 94.01229476928711, "p90": 360.2313934326172, "max": 570.713134765625, "pos_frac": 0.765625, "sample": [-29.138259887695312, 15.142078399658203, -134.92471313476562, 428.2406005859375, 49.087162017822266, -191.04202270507812, 110.17733001708984, 193.71267700195312, 74.87831115722656, 217.68988037109375, 249.74749755859375, 362.2593688964844, 112.93338012695312, 368.5832214355469, 88.50349426269531, 97.7945785522461, 269.28436279296875, 105.74999237060547, 38.146121978759766, 351.5647277832031, -114.21759033203125, 58.79852294921875, 2.8442115783691406, 222.5519256591797, 146.7158660888672, 43.34857177734375, 570.713134765625, -173.290283203125, 57.41426086425781, 252.822265625, 164.28268432617188, 264.1295471191406, 193.8426055908203, 384.56964111328125, 377.28863525390625, 43.611900329589844, 355.49945068359375, 35.18470001220703, 82.38946533203125, -69.65374755859375, 73.24796295166016, 110.01870727539062, 75.25374603271484, 204.5128173828125, -48.012168884277344, 160.5964813232422, 205.37762451171875, 225.78871154785156, -35.26319885253906, -138.33352661132812, 135.06886291503906, 451.79638671875, 33.756309509277344, 90.23001098632812, 79.3698959350586, 144.2439727783203, -6.966651916503906, 185.46871948242188, -19.038665771484375, 183.9677734375, -414.19415283203125, -0.4971656799316406, -5.4742279052734375, -43.542579650878906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 173.34832763671875, "std": 170.72828674316406, "min": -300.52484130859375, "p10": -32.8751106262207, "median": 163.27852630615234, "p90": 426.37104187011727, "max": 541.2179565429688, "pos_frac": 0.84375, "sample": [75.46505737304688, 382.61651611328125, -26.482419967651367, 320.50634765625, 209.95974731445312, 85.44499206542969, -32.51411437988281, 263.9464111328125, 104.64486694335938, 164.07528686523438, 217.4923858642578, 184.76864624023438, 8.096641540527344, 262.7779235839844, 14.087417602539062, 123.37393188476562, -48.2718505859375, 96.44499206542969, 102.73004150390625, 141.8181915283203, 35.35540771484375, -107.33926391601562, -33.029823303222656, 220.2893524169922, 147.23199462890625, 179.03411865234375, 221.10186767578125, 356.5751037597656, 320.3428649902344, 146.08560180664062, 87.23868560791016, 34.779869079589844, 362.0306396484375, 488.3899841308594, 162.4817657470703, -122.34566497802734, 142.01699829101562, 178.67739868164062, 248.567138671875, 445.147216796875, -37.47312927246094, -16.070755004882812, -300.52484130859375, -44.602821350097656, 55.31568908691406, 51.39573669433594, 432.4872131347656, 142.3131561279297, 380.8839416503906, 465.298828125, 178.9173126220703, 115.22975158691406, 102.78675842285156, 21.599279403686523, 436.09918212890625, 332.08294677734375, 505.7169494628906, 274.49542236328125, 412.0999755859375, 234.52931213378906, 240.41639709472656, 168.74252319335938, 237.7235870361328, 541.2179565429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 119.83944702148438, "std": 164.9060821533203, "min": -151.354248046875, "p10": -49.926029205322266, "median": 90.4973258972168, "p90": 364.4439025878907, "max": 719.4259033203125, "pos_frac": 0.765625, "sample": [484.0362548828125, 101.08262634277344, -50.583824157714844, 304.67718505859375, 17.330543518066406, 78.47410583496094, 369.94195556640625, -15.660675048828125, 68.75021362304688, 114.6232681274414, 113.80404663085938, 373.7340087890625, -71.11885070800781, 85.3955307006836, 501.7901916503906, 173.4543914794922, 197.24627685546875, 69.62818908691406, 139.4058837890625, 220.88082885742188, 395.8504943847656, 55.30228042602539, 112.1120376586914, -8.827346801757812, 68.81367492675781, -90.45838165283203, 351.6151123046875, -40.8083381652832, 211.03074645996094, 719.4259033203125, 471.5071105957031, 259.44024658203125, 131.7720947265625, 31.641935348510742, -151.354248046875, 71.63941955566406, 106.18106079101562, 175.24746704101562, 87.91033935546875, -48.39117431640625, -21.3717041015625, 29.421091079711914, 250.9901123046875, 76.72160339355469, -26.899497985839844, 127.34026336669922, -46.6328125, -124.9551010131836, 62.22329330444336, 129.916015625, 307.8419189453125, 168.1720428466797, 93.08431243896484, -1.9286842346191406, -51.966312408447266, 124.3167724609375, -93.90614318847656, 127.57286071777344, 25.375762939453125, 15.94622802734375, 9.366485595703125, 154.0127410888672, 107.38916015625, 41.15174865722656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 96.3275146484375, "std": 166.57598876953125, "min": -181.1040496826172, "p10": -79.9994728088379, "median": 59.68055725097656, "p90": 323.09589233398447, "max": 626.635498046875, "pos_frac": 0.71875, "sample": [186.264892578125, 101.10784149169922, 15.148248672485352, 36.623207092285156, 23.51947021484375, -148.6641387939453, 357.4950866699219, 274.0526123046875, -28.227554321289062, 268.937255859375, 58.671417236328125, 416.2598876953125, 86.56713104248047, -80.68892669677734, -26.94525146484375, 51.017555236816406, -34.998443603515625, 188.12109375, -107.26603698730469, 79.27265167236328, 159.1313934326172, 591.7235107421875, 60.689697265625, 78.118408203125, 143.10711669921875, -36.816410064697266, 626.635498046875, -10.475784301757812, 47.346343994140625, 236.54434204101562, 155.20632934570312, 221.11138916015625, -69.82998657226562, 77.24435424804688, -108.46365356445312, 114.86827087402344, -115.1633529663086, 25.539592742919922, -78.3907470703125, 443.39044189453125, 18.379852294921875, -59.27031707763672, 6.2398834228515625, -101.62937927246094, 32.28328323364258, 94.76473999023438, 7.621002197265625, -181.1040496826172, 283.54266357421875, 141.12225341796875, 360.0577392578125, 27.517559051513672, -10.671844482421875, 329.7852478027344, 252.78515625, -34.44490051269531, 121.96895599365234, 307.4873962402344, -24.124868392944336, 53.18268585205078, 60.9009895324707, 45.274906158447266, 85.7099609375, 69.7975082397461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 154.75473022460938, "std": 225.0361328125, "min": -238.16937255859375, "p10": -80.57775421142577, "median": 105.34866333007812, "p90": 490.4770050048829, "max": 723.931640625, "pos_frac": 0.71875, "sample": [12.443681716918945, 13.890567779541016, 530.2860107421875, -34.25978088378906, 192.53060913085938, 215.45510864257812, -194.90823364257812, -126.23242950439453, -8.451797485351562, -192.3253936767578, 119.16365051269531, 51.7645263671875, 175.1871795654297, 271.3468933105469, 330.574951171875, 723.931640625, 127.50879669189453, 79.96151733398438, 541.9251098632812, 357.23529052734375, 45.65116882324219, 180.47569274902344, 79.83340454101562, 566.0166015625, 30.008872985839844, 300.351806640625, 0.295562744140625, -82.437744140625, -182.29507446289062, -61.860538482666016, 166.50425720214844, 504.9178161621094, 203.01528930664062, 174.42735290527344, 316.19073486328125, 500.1723937988281, 15.240224838256836, 452.02764892578125, -76.23777770996094, 35.90293884277344, 122.89069366455078, 395.62408447265625, -35.92057418823242, 370.3921813964844, 79.18701171875, 404.11444091796875, -64.01481628417969, -238.16937255859375, -151.90504455566406, -35.07080078125, 47.86933898925781, -37.763206481933594, 351.94024658203125, 91.53367614746094, -27.454940795898438, 209.3809051513672, -20.872234344482422, 467.85443115234375, -33.41482925415039, 665.6146240234375, 373.62542724609375, 77.39680480957031, 328.870361328125, 207.36471557617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 160.46282958984375, "std": 180.31822204589844, "min": -192.03318786621094, "p10": -54.628299713134744, "median": 143.9092025756836, "p90": 392.43039245605473, "max": 670.0224609375, "pos_frac": 0.78125, "sample": [-78.4208755493164, 279.82635498046875, 318.33770751953125, 249.3372802734375, 350.6478576660156, 121.94104766845703, 158.8289794921875, 198.89736938476562, 434.5904846191406, -16.52564811706543, 290.98284912109375, 147.27037048339844, -192.03318786621094, -7.712123870849609, 91.15372467041016, 264.3202819824219, 85.52418518066406, -64.60082244873047, 74.2840576171875, 336.69024658203125, 670.0224609375, 152.35629272460938, -13.400934219360352, -22.598609924316406, 61.445701599121094, -31.659255981445312, 185.23428344726562, 350.3802490234375, 10.498703002929688, 381.343505859375, 0.2754230499267578, 116.21865844726562, 458.72332763671875, 131.63894653320312, 537.9139404296875, 151.94891357421875, 56.267982482910156, 159.7366943359375, 64.99391174316406, 287.72003173828125, 276.2726135253906, 162.37738037109375, 238.05197143554688, 94.50215911865234, 13.617012023925781, 84.86195373535156, -101.97323608398438, -0.30629730224609375, 270.46380615234375, 104.55426025390625, 109.54602813720703, 419.14453125, 656.1181640625, -64.47217559814453, 287.22344970703125, 188.61297607421875, -75.34928894042969, -0.7700672149658203, 189.9438018798828, 60.59840393066406, 140.54803466796875, -127.94674682617188, 397.1819152832031, 194.42071533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 103.37664794921875, "std": 180.46923828125, "min": -416.92987060546875, "p10": -86.71147918701172, "median": 105.06742095947266, "p90": 296.26452941894536, "max": 554.5057373046875, "pos_frac": 0.734375, "sample": [26.89971923828125, 242.85916137695312, -416.92987060546875, 47.144195556640625, 150.3299102783203, 548.1482543945312, 175.89157104492188, 336.2158508300781, -77.63052368164062, 187.06005859375, -164.7803192138672, -13.366424560546875, 145.99130249023438, 286.1903076171875, 36.404998779296875, 39.74076843261719, -51.26507568359375, 1.7630538940429688, 104.07034301757812, -294.7449951171875, 17.739330291748047, 21.3681640625, -7.5343170166015625, 66.66870880126953, 371.4674377441406, 238.2609405517578, -39.32079315185547, -69.64889526367188, 133.52401733398438, 194.25828552246094, 123.98696899414062, 106.06449890136719, 298.7391357421875, -89.46646118164062, 128.74197387695312, 204.448974609375, -5.946123123168945, 290.4904479980469, 28.912399291992188, 176.24700927734375, 30.057567596435547, 99.14335632324219, 554.5057373046875, 259.78387451171875, 208.60182189941406, 174.9713592529297, 194.0703125, -52.447105407714844, -194.72854614257812, 160.6658477783203, 531.83447265625, 268.65020751953125, -137.14669799804688, 30.8172607421875, 123.12614440917969, -4.60035514831543, -96.0680923461914, 200.75289916992188, 95.39865112304688, 18.576202392578125, 203.35357666015625, 141.42381286621094, 386.6519470214844, -80.28318786621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 166.98321533203125, "std": 150.60899353027344, "min": -74.41011810302734, "p10": -6.000893402099603, "median": 140.77215576171875, "p90": 388.32298278808594, "max": 508.2988586425781, "pos_frac": 0.890625, "sample": [416.7685241699219, 43.401084899902344, 5.6063690185546875, 176.7132568359375, 203.1687774658203, 130.11805725097656, 472.1922302246094, 32.967811584472656, 80.66880798339844, 145.9196319580078, 222.47491455078125, 390.7315979003906, 113.71900177001953, 270.2318420410156, 323.6827392578125, 308.8684387207031, 74.02012634277344, 245.4193572998047, 2.7192306518554688, 382.702880859375, 142.7298583984375, -8.796417236328125, 66.34815216064453, 306.836669921875, 240.71511840820312, 23.095752716064453, 166.7508544921875, 431.6864013671875, 240.83853149414062, 44.16320037841797, 508.2988586425781, 135.42288208007812, -57.46031188964844, 246.0867462158203, 9.45235824584961, 262.911376953125, 96.81787872314453, 454.3802490234375, 110.63494110107422, -30.93979263305664, 127.15811920166016, 116.19144439697266, 42.01374053955078, 46.76599884033203, 10.255695343017578, 271.18072509765625, 120.17822265625, 102.77947235107422, -70.8472900390625, -74.41011810302734, 146.68557739257812, 473.2326965332031, 367.02239990234375, 144.03662109375, 187.76918029785156, 138.814453125, 312.4494323730469, 101.69939422607422, 0.5219955444335938, 187.86036682128906, -67.08108520507812, 340.44293212890625, -26.468006134033203, 256.605712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 122.92462921142578, "std": 184.4892578125, "min": -346.05499267578125, "p10": -75.34105911254883, "median": 117.25851058959961, "p90": 388.82241210937525, "max": 692.2968139648438, "pos_frac": 0.734375, "sample": [167.12086486816406, 209.62301635742188, 200.4658203125, 181.62347412109375, -1.2578086853027344, 130.6397705078125, 88.9074478149414, 152.85565185546875, 30.05245590209961, 185.75143432617188, 59.34406280517578, 31.603912353515625, 432.940185546875, 142.05545043945312, 229.98324584960938, -69.69209289550781, -155.2028350830078, 150.65574645996094, -74.3051528930664, 98.61915588378906, 147.20321655273438, -95.31202697753906, 435.1961669921875, 692.2968139648438, -75.78501892089844, 440.436279296875, 315.962646484375, -30.045654296875, -18.356643676757812, 25.000560760498047, 493.6029052734375, 96.30909729003906, 205.34584045410156, 219.25503540039062, 7.697914123535156, 242.3468475341797, 7.896995544433594, 173.64511108398438, 323.38018798828125, -37.593666076660156, 235.57577514648438, 103.87725067138672, 140.47747802734375, 263.6483459472656, 416.86907958984375, 49.024810791015625, 12.98382568359375, -2.9438552856445312, -346.05499267578125, 297.7339782714844, -62.426124572753906, 71.41455078125, -179.61062622070312, -47.01466369628906, 223.39508056640625, 310.31304931640625, -94.9804458618164, 459.20770263671875, 3.8432540893554688, -126.13905334472656, -59.13539123535156, 149.58792114257812, 63.718505859375, 223.54449462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 124.2857437133789, "std": 180.33380126953125, "min": -199.0906982421875, "p10": -42.679440307617185, "median": 86.74872589111328, "p90": 297.3473205566406, "max": 779.9769897460938, "pos_frac": 0.765625, "sample": [-18.129104614257812, -29.153587341308594, 7.486541748046875, 148.7431640625, 20.38821792602539, -5.8768768310546875, 141.2201385498047, 296.10125732421875, -102.22991943359375, 424.41790771484375, 69.24530029296875, 166.52037048339844, 76.85617065429688, 63.81802749633789, -8.147363662719727, 184.45062255859375, 279.44146728515625, -83.04888916015625, 53.929161071777344, 54.939571380615234, 8.283376693725586, 712.6031494140625, 36.84620666503906, 137.57505798339844, -45.18717956542969, 554.7098388671875, 112.75836181640625, 4.786781311035156, 139.95449829101562, -117.97343444824219, 134.74884033203125, 126.125, 183.19207763671875, 98.83065032958984, 367.30615234375, 23.951290130615234, 76.23330688476562, 149.4686279296875, 779.9769897460938, 175.47845458984375, -1.5374794006347656, 4.797119140625, 75.09469604492188, -122.9272689819336, -36.82804870605469, 270.27496337890625, 248.1741943359375, 176.501708984375, 83.5953598022461, 459.1672668457031, 228.92481994628906, 17.946258544921875, 191.98931884765625, 236.90127563476562, -199.0906982421875, 297.88134765625, 244.20928955078125, 55.539398193359375, -24.109642028808594, -25.421142578125, 176.956787109375, 169.67869567871094, -63.97271728515625, 89.90209197998047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 139.981201171875, "std": 173.57762145996094, "min": -292.5069885253906, "p10": -49.63237571716308, "median": 135.10343170166016, "p90": 366.2469024658203, "max": 522.32958984375, "pos_frac": 0.78125, "sample": [374.309814453125, 180.94081115722656, -143.137939453125, 357.7381286621094, -113.44686889648438, 216.1939697265625, 65.52719116210938, 135.7432403564453, -52.35323715209961, -154.9404296875, 54.71657180786133, 41.324127197265625, 86.13925170898438, 277.563232421875, 72.97268676757812, 7.059516906738281, 522.32958984375, 134.463623046875, 201.87875366210938, 137.14248657226562, 225.9810028076172, 191.4479522705078, 244.68260192871094, -62.68301773071289, -4.947822570800781, 31.853574752807617, 197.21583557128906, 417.6157531738281, 236.36167907714844, 113.09513854980469, -28.44257164001465, 27.937530517578125, 364.35430908203125, 432.55523681640625, -183.1503448486328, 94.23795318603516, -23.64665985107422, 354.9832763671875, -2.1071014404296875, 367.0580139160156, 42.446842193603516, 346.5924072265625, 358.2320861816406, 149.03433227539062, -35.63526916503906, 43.037208557128906, -292.5069885253906, 161.40127563476562, -33.08579635620117, 171.3159942626953, 153.75244140625, 303.8638000488281, 323.977783203125, 77.03915405273438, 505.670166015625, 218.49298095703125, 423.29010009765625, 88.48519897460938, 124.39540100097656, 138.37489318847656, 63.623779296875, -43.28369903564453, 267.76702880859375, 5.948904037475586], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 116.74778747558594, "std": 198.1468963623047, "min": -252.17137145996094, "p10": -197.17318725585932, "median": 126.6180191040039, "p90": 388.7883544921876, "max": 578.8377075195312, "pos_frac": 0.765625, "sample": [134.00967407226562, 146.8570098876953, 170.90420532226562, 96.4049301147461, 90.79176330566406, 367.7625732421875, 157.26699829101562, 456.00152587890625, -235.89767456054688, -58.224571228027344, 209.06373596191406, 142.6575164794922, -247.94265747070312, 201.82000732421875, 394.573974609375, 91.77892303466797, -226.85009765625, 77.35850524902344, 37.83728790283203, 259.0668029785156, 168.58380126953125, 451.0228271484375, 181.05062866210938, 167.2059326171875, 71.23953247070312, 10.181285858154297, -246.34207153320312, 128.80096435546875, 82.93504333496094, 526.6885986328125, 193.0551300048828, 116.44090270996094, 95.71058654785156, 29.20977020263672, -225.32244873046875, 375.28857421875, 235.7887420654297, 188.97808837890625, -217.94467163085938, 76.54389190673828, -81.45832824707031, 320.12554931640625, -148.70639038085938, 50.65000534057617, 578.8377075195312, -128.4562530517578, 287.27734375, -22.727907180786133, 245.61871337890625, 194.67237854003906, 127.36796569824219, 453.67181396484375, -50.54698181152344, 117.68981170654297, -127.56843566894531, 145.31741333007812, 95.33462524414062, -52.25938415527344, 125.86807250976562, 200.8403778076172, 48.829833984375, 163.3860321044922, 505.9108581542969, -252.17137145996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 182.75540161132812, "std": 215.35369873046875, "min": -239.30120849609375, "p10": -52.69979248046874, "median": 164.36551666259766, "p90": 456.2829132080079, "max": 908.6885986328125, "pos_frac": 0.8125, "sample": [100.54609680175781, 78.01026916503906, -239.30120849609375, -20.72318458557129, 46.00677490234375, 243.974365234375, 257.96551513671875, 468.43511962890625, 92.88876342773438, 190.79348754882812, 128.9043426513672, -57.3565673828125, -10.612384796142578, 524.955078125, -41.833984375, 164.05923461914062, -38.01015853881836, 359.2685241699219, 169.93756103515625, 180.04296875, 387.98785400390625, 165.32049560546875, 91.51081085205078, 130.67991638183594, 908.6885986328125, 427.9277648925781, 212.00689697265625, 150.05953979492188, 589.687255859375, 47.09851837158203, 861.9174194335938, 482.26788330078125, 329.77813720703125, 78.52591705322266, 106.55705261230469, 164.6717987060547, 217.34521484375, 100.7417984008789, -64.94802856445312, 183.9963836669922, 130.055419921875, 49.51286315917969, 110.6099853515625, -12.46990966796875, 268.09063720703125, 81.14398193359375, 350.0238952636719, -96.46026611328125, 244.0064697265625, 200.77850341796875, 74.92427062988281, 49.50780487060547, 284.8233947753906, 491.5533142089844, 297.36627197265625, 179.42367553710938, -172.08880615234375, 308.107421875, 250.217041015625, 258.9056396484375, -208.5443878173828, 397.6996154785156, 94.64057922363281, -105.25408935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 132.41099548339844, "std": 180.52566528320312, "min": -279.65716552734375, "p10": -39.62632904052734, "median": 100.08974075317383, "p90": 329.479476928711, "max": 715.1510620117188, "pos_frac": 0.765625, "sample": [-41.046348571777344, 30.781002044677734, 176.78173828125, 308.197021484375, 56.713619232177734, -279.65716552734375, 12.996917724609375, -10.088394165039062, 320.7225341796875, 150.989013671875, 37.338645935058594, 274.39447021484375, 654.5390014648438, -11.406557083129883, -10.981056213378906, 67.26006317138672, 112.75261688232422, 102.63677978515625, 89.1961669921875, 15.41419792175293, 205.960693359375, 117.63717651367188, 254.11354064941406, 216.63125610351562, 259.43536376953125, -36.312950134277344, 83.73387908935547, 88.18477630615234, 125.57292938232422, -73.48593139648438, 174.91973876953125, 232.66000366210938, 97.5427017211914, -125.9483642578125, 273.7649841308594, 113.83243560791016, 333.2324523925781, 472.7926025390625, 8.058845520019531, 92.67726135253906, 93.49917602539062, 207.81285095214844, -4.6307373046875, 715.1510620117188, 18.26508331298828, 76.91287994384766, 489.15985107421875, 109.8018798828125, -13.593002319335938, -1.6496257781982422, -117.32035064697266, 443.62713623046875, 216.43434143066406, -6.691383361816406, 76.97322082519531, 118.56329345703125, 147.2197265625, -144.7386016845703, 258.00518798828125, 234.08670043945312, 6.35264778137207, 478.1263122558594, 151.23025512695312, -50.832481384277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 162.53128051757812, "std": 191.20957946777344, "min": -297.2212829589844, "p10": -51.12767200469969, "median": 135.04093170166016, "p90": 409.38558349609383, "max": 778.234619140625, "pos_frac": 0.859375, "sample": [388.98828125, 43.80190658569336, 86.08185577392578, 511.8323974609375, 214.55126953125, 75.24092864990234, 138.8695526123047, 182.28057861328125, -81.92253112792969, -99.20951080322266, -59.923553466796875, 83.55953979492188, 235.92630004882812, 27.245981216430664, -138.62062072753906, 19.79498291015625, -104.94186401367188, 344.5868225097656, 134.0679473876953, 169.96701049804688, -297.2212829589844, 44.82820129394531, 180.3015899658203, 23.97333526611328, 7.419015884399414, 342.01788330078125, 72.58633422851562, 211.98974609375, 94.01140594482422, 416.72357177734375, 329.64471435546875, 291.00408935546875, -30.60394859313965, 136.013916015625, -93.7449951171875, 344.19287109375, 276.2848205566406, 567.1783447265625, 363.47344970703125, 261.3461608886719, 314.757080078125, 129.32626342773438, 422.09088134765625, 778.234619140625, 440.8189697265625, 48.92070007324219, 141.9444580078125, 46.96889114379883, 392.26361083984375, -18.380218505859375, 182.4281463623047, 227.29730224609375, 23.53448486328125, 130.77056884765625, 538.1265258789062, 45.41828155517578, 159.56686401367188, 77.69697570800781, 17.580101013183594, 246.97119140625, 14.458114624023438, 66.08500671386719, 48.19264221191406, 213.33453369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 165.4689483642578, "std": 172.20138549804688, "min": -175.1147918701172, "p10": -13.752536392211914, "median": 141.26097869873047, "p90": 398.3793273925782, "max": 630.8477783203125, "pos_frac": 0.84375, "sample": [76.07295227050781, -13.547599792480469, 120.96754455566406, 161.03271484375, 4.940807342529297, 58.388240814208984, 484.0377502441406, 134.3631134033203, -54.955902099609375, -3.480560302734375, 197.3039093017578, 224.74826049804688, 9.70697021484375, 324.40704345703125, 42.722625732421875, 136.5760040283203, -4.522907257080078, 468.6629943847656, 59.570316314697266, 115.02521514892578, -175.1147918701172, 145.4957275390625, 288.0701904296875, -72.72286987304688, 190.73983764648438, 32.88484191894531, 164.70346069335938, 239.62799072265625, 146.9558868408203, 50.42402648925781, 260.47186279296875, 122.51873016357422, 357.516845703125, 347.59783935546875, 487.811279296875, 618.9600219726562, 203.9114532470703, 406.7236633300781, 51.5880126953125, 630.8477783203125, 236.6693115234375, -13.84036636352539, 377.1298828125, 200.50241088867188, 157.99868774414062, -79.3382339477539, 160.66285705566406, -41.497955322265625, 12.801467895507812, 267.776123046875, 40.61405944824219, 31.10242462158203, 155.18077087402344, 94.55152130126953, 22.291290283203125, 137.02622985839844, 378.9092102050781, 217.44873046875, -81.17472839355469, 116.21517944335938, 103.71719360351562, 485.32476806640625, 243.5823516845703, 325.32763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 129.53224182128906, "std": 186.38296508789062, "min": -468.3556213378906, "p10": -32.498823928833, "median": 103.20882797241211, "p90": 347.53512878417973, "max": 645.4560546875, "pos_frac": 0.8125, "sample": [108.1397476196289, 419.06707763671875, 196.7110595703125, 144.3842010498047, 195.64166259765625, 184.01828002929688, 65.02181243896484, -25.224716186523438, 211.72894287109375, 116.62782287597656, 93.25546264648438, 93.20132446289062, 146.55177307128906, 13.220523834228516, 168.68130493164062, 177.53775024414062, 232.011474609375, 326.31787109375, 79.32540893554688, 21.88705062866211, 546.1224365234375, 153.1403350830078, -20.90410614013672, 354.09991455078125, 67.35337829589844, 2.1188507080078125, 259.83453369140625, -35.61629867553711, -217.84825134277344, -7.704948425292969, 306.59197998046875, 148.83621215820312, 645.4560546875, -181.1787567138672, 189.91241455078125, 31.85578155517578, 17.73278045654297, 105.26002502441406, 311.30535888671875, 84.52902221679688, -19.076522827148438, 212.1912841796875, 132.22862243652344, -84.25410461425781, 98.3759994506836, -468.3556213378906, 297.28765869140625, 333.592529296875, 35.36463165283203, 225.88087463378906, 353.5105285644531, 35.624053955078125, 590.6493530273438, -70.5667724609375, -6.721004486083984, 525.5409545898438, 79.73574829101562, 116.21277618408203, 12.492202758789062, -107.81744384765625, 89.7659912109375, 70.51155090332031, 101.15763092041016, 7.730049133300781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 132.4814910888672, "std": 193.35752868652344, "min": -278.55133056640625, "p10": -78.78883514404295, "median": 86.3569107055664, "p90": 365.97986755371096, "max": 821.663330078125, "pos_frac": 0.796875, "sample": [216.6512451171875, 366.9255065917969, 153.67398071289062, 94.19336700439453, 420.90008544921875, 204.49314880371094, 821.663330078125, -191.9473876953125, 46.37771224975586, -278.55133056640625, 72.19652557373047, 78.41402435302734, 73.58538818359375, -1.4878292083740234, 193.3103485107422, 363.77337646484375, 328.25494384765625, 74.5834732055664, 55.04985046386719, 62.38490295410156, 49.47938537597656, 391.6071472167969, 148.33203125, -100.47054290771484, 112.29508972167969, -116.71676635742188, 224.87518310546875, -54.89581298828125, 307.5339660644531, -136.70675659179688, 4.209384918212891, -31.22399139404297, 119.31222534179688, 108.23872375488281, 306.0009765625, 73.48632049560547, 12.07029914855957, -6.939907073974609, 15.456100463867188, 246.28128051757812, 396.9325256347656, 118.5237808227539, 37.60509490966797, 218.2392120361328, 82.48936462402344, 168.86691284179688, 90.22445678710938, -40.47386169433594, 1.8177452087402344, 242.4774627685547, -89.02870178222656, 96.974853515625, 335.4753723144531, 205.7746124267578, 79.57745361328125, 47.15562438964844, 9.37678337097168, 603.6223754882812, 307.1652526855469, 615.2415771484375, -29.96753692626953, 248.5159912109375, -105.49583435058594, 11.056217193603516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 185.8557586669922, "std": 204.83453369140625, "min": -293.5360412597656, "p10": -17.83298873901366, "median": 157.30359649658203, "p90": 468.8611938476563, "max": 716.287353515625, "pos_frac": 0.859375, "sample": [-276.99951171875, 122.13584899902344, -51.32887268066406, 397.45037841796875, 67.52040100097656, 477.61199951171875, 175.668212890625, 453.63311767578125, -48.706947326660156, 394.2989501953125, 384.3016662597656, 495.7710266113281, 230.3004913330078, 265.9949645996094, 65.9476089477539, 348.84881591796875, 477.4769287109375, 52.105934143066406, 371.3898620605469, 314.6606140136719, 177.53585815429688, 26.73594856262207, 39.16825866699219, 136.26519775390625, 33.635406494140625, 289.357421875, 32.41529083251953, 84.67005920410156, 90.74042510986328, 129.91146850585938, 247.66986083984375, 359.5235595703125, 80.16828155517578, 153.23516845703125, 48.66407775878906, 398.17401123046875, 536.0364990234375, 351.3316650390625, 114.48268127441406, -162.05477905273438, -1.8876838684082031, 208.7266387939453, -7.195068359375, 206.28839111328125, 73.73519134521484, 30.41484832763672, 161.3720245361328, 416.6820068359375, 678.4849853515625, 99.89527130126953, 30.61981964111328, -140.35855102539062, 77.98243713378906, 716.287353515625, 208.85006713867188, -22.39209747314453, 178.57247924804688, 318.447509765625, 203.92320251464844, -293.5360412597656, 475.38751220703125, 23.090621948242188, 146.16261291503906, 219.46701049804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 161.8157501220703, "std": 150.04066467285156, "min": -190.25010681152344, "p10": 1.7972259521484393, "median": 152.4994125366211, "p90": 359.4876220703125, "max": 511.92523193359375, "pos_frac": 0.90625, "sample": [330.49468994140625, 360.9246520996094, 97.16519165039062, 172.69561767578125, 209.34886169433594, 1.0436668395996094, 140.0884552001953, 379.62615966796875, 127.71183776855469, -126.7486572265625, 22.062088012695312, 252.86434936523438, 212.82315063476562, 42.69541931152344, 246.84642028808594, 57.56330871582031, 25.416337966918945, 77.35432434082031, 174.4551544189453, -25.491291046142578, 164.91036987304688, 290.4295349121094, 239.86990356445312, 429.3071594238281, 41.995758056640625, -22.53125, 131.27447509765625, 343.3890380859375, 100.00868225097656, 233.41201782226562, 172.3992462158203, -190.25010681152344, 166.93283081054688, 248.97357177734375, 137.8258514404297, 189.30194091796875, 90.82893371582031, 5.1466217041015625, -10.5411376953125, 52.02827072143555, 228.25209045410156, 301.6678466796875, 203.06951904296875, 450.26861572265625, 113.15402221679688, 98.49717712402344, 456.07373046875, 299.2502136230469, 24.1478271484375, 511.92523193359375, 7.502281188964844, 209.31431579589844, 356.1345520019531, 263.39166259765625, 282.70465087890625, 411.7825622558594, 8.685028076171875, 63.81968688964844, 139.29541015625, 310.265625, 113.85669708251953, 3.555530548095703, 26.505374908447266, -120.56321716308594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 87.66857147216797, "std": 202.75552368164062, "min": -400.05828857421875, "p10": -161.00438385009764, "median": 73.23467254638672, "p90": 319.33792724609384, "max": 745.7396240234375, "pos_frac": 0.671875, "sample": [-8.806840896606445, 330.046630859375, -129.9193878173828, -103.74513244628906, 128.58773803710938, -148.2109375, 12.461044311523438, 125.12815856933594, 74.822509765625, 395.2098388671875, 17.327178955078125, 56.872283935546875, 65.92828369140625, 51.43279266357422, 263.180908203125, -400.05828857421875, -141.92076110839844, 400.99444580078125, 151.0691680908203, -81.618408203125, 144.89535522460938, 202.1719207763672, -194.40707397460938, 159.3177490234375, -39.79261779785156, 369.74176025390625, -173.52210998535156, 288.68902587890625, 108.31604766845703, 71.64683532714844, -37.163448333740234, -40.245906829833984, 249.002685546875, 105.68070983886719, 139.42245483398438, -28.94830322265625, 188.20767211914062, -342.19110107421875, 136.93434143066406, 272.0602722167969, 211.84140014648438, -25.055648803710938, 745.7396240234375, -97.60731506347656, -215.34719848632812, -166.48728942871094, 60.58472442626953, 34.05782699584961, 119.55618286132812, 199.9377899169922, 121.42365264892578, 269.9250183105469, -181.63125610351562, 273.67498779296875, 294.3509521484375, 278.342529296875, 70.89816284179688, -127.3687744140625, -60.211997985839844, 245.4879913330078, 21.454078674316406, 452.07476806640625, 436.44720458984375, 10.103691101074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 166.430419921875, "std": 173.07870483398438, "min": -204.7109832763672, "p10": -37.67820663452146, "median": 169.42218780517578, "p90": 413.49140014648447, "max": 546.220703125, "pos_frac": 0.8125, "sample": [281.6188049316406, -14.161638259887695, 43.88835144042969, 2.663188934326172, -47.467864990234375, 186.8235626220703, 108.08350372314453, 514.8240966796875, 160.4532470703125, -204.7109832763672, 81.49048614501953, 520.6907958984375, 227.88662719726562, 168.2991485595703, 262.9056396484375, 175.62474060058594, 53.02342987060547, 150.12698364257812, 354.5452880859375, 394.16778564453125, 170.54522705078125, -84.41072845458984, -14.835670471191406, 101.78036499023438, 104.89252471923828, 226.64755249023438, 84.07747650146484, -8.318227767944336, 68.0372314453125, 15.36724853515625, -48.467445373535156, 43.457550048828125, 157.7947540283203, 294.0353088378906, -62.896141052246094, 546.220703125, 184.96044921875, 193.96014404296875, -134.124267578125, -5.831047058105469, 207.39930725097656, 430.29437255859375, 310.66497802734375, 197.28196716308594, 184.4102783203125, 47.33666229248047, 280.23583984375, 116.37012481689453, 304.79400634765625, 161.13067626953125, 82.12026977539062, -2.8213348388671875, 194.3028106689453, 263.3471374511719, -174.1077880859375, 263.758056640625, 421.77294921875, 238.79795837402344, 178.71878051757812, 493.4741516113281, 341.2181396484375, 7.97553825378418, 528.3903198242188, 321.013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 109.18755340576172, "std": 194.24325561523438, "min": -377.455078125, "p10": -66.17723007202149, "median": 87.12510299682617, "p90": 349.5426300048828, "max": 732.4947509765625, "pos_frac": 0.703125, "sample": [400.4849548339844, 12.706844329833984, 346.5113830566406, -40.35643768310547, 237.42373657226562, -273.8641357421875, -377.455078125, 732.4947509765625, 4.075813293457031, 204.61563110351562, 212.62332153320312, -169.45858764648438, 350.84173583984375, 52.54218292236328, 116.954833984375, -29.255661010742188, -319.1978759765625, 482.4237976074219, -21.348180770874023, 176.72035217285156, 41.66679000854492, 283.7589111328125, -18.129348754882812, -63.94194793701172, 19.783294677734375, 182.52491760253906, -148.12225341796875, -67.13520812988281, 105.97476196289062, 271.46282958984375, 65.7049560546875, -141.09912109375, 319.1298828125, -37.60051727294922, 75.63384246826172, 232.44772338867188, 73.61602020263672, 147.80905151367188, 146.39964294433594, 159.53741455078125, -2.7366790771484375, 227.5704803466797, -17.9276065826416, 305.01702880859375, -5.377227783203125, 394.1947326660156, 202.4265594482422, 200.1084442138672, 93.2848892211914, 124.26245880126953, -44.94969940185547, 18.3082275390625, 550.6345825195312, 71.53919982910156, 352.14398193359375, 199.1600341796875, -28.3380184173584, 192.99981689453125, 80.96531677246094, -30.059986114501953, 29.11797332763672, 161.16897583007812, 41.842994689941406, 123.74171447753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 150.75619506835938, "std": 151.4440460205078, "min": -465.741455078125, "p10": -17.33648624420165, "median": 165.2125473022461, "p90": 331.7526794433594, "max": 492.259765625, "pos_frac": 0.859375, "sample": [276.6180419921875, 378.7261047363281, 45.94282913208008, 217.53329467773438, 247.46131896972656, 69.2491455078125, 130.76992797851562, 354.2627258300781, 83.11959838867188, 166.12326049804688, 224.31654357910156, 16.700042724609375, 185.14251708984375, 48.63728332519531, 112.84223937988281, 175.01185607910156, 76.16168212890625, 157.25624084472656, 165.48867797851562, -55.078697204589844, 288.77679443359375, 180.07151794433594, 12.688426971435547, 424.9608154296875, 255.66781616210938, -119.75627136230469, -22.404481887817383, 129.7208251953125, -20.53973388671875, 133.00381469726562, -28.31229019165039, 189.43881225585938, 22.720048904418945, 181.2623291015625, 94.97628784179688, 116.36920166015625, 122.75213623046875, 441.17535400390625, 388.38299560546875, 320.0746765136719, 50.00318908691406, 164.93641662597656, 185.9037322998047, 63.03172302246094, 165.59555053710938, 318.0008544921875, 280.6392822265625, 336.7575378417969, 199.36959838867188, 185.41368103027344, -8.093002319335938, -465.741455078125, 314.7691650390625, 269.4437255859375, 127.359375, 117.09257507324219, -9.862241744995117, 177.72610473632812, 492.259765625, -53.64254379272461, 99.3355712890625, 51.02130889892578, 193.55679321289062, 206.20599365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 145.12869262695312, "std": 179.04647827148438, "min": -374.4208679199219, "p10": -67.42394332885742, "median": 139.3464126586914, "p90": 388.8109344482422, "max": 574.6834716796875, "pos_frac": 0.796875, "sample": [-61.203758239746094, 19.670310974121094, 273.96319580078125, 44.23728942871094, -2.721038818359375, 142.66778564453125, 239.28530883789062, 61.79753875732422, 199.38551330566406, 87.06422424316406, 106.0159912109375, 574.6834716796875, 206.21006774902344, 68.91170501708984, 248.97015380859375, 126.61428833007812, 400.2593078613281, 93.73225402832031, -98.90052795410156, 104.43861389160156, -16.12849235534668, -374.4208679199219, 249.86123657226562, 383.3353271484375, 391.1576232910156, 9.580728530883789, 448.9229736328125, 29.473243713378906, -20.47698974609375, 51.20322036743164, 20.00408172607422, -70.08973693847656, 153.3489990234375, 260.4599609375, 290.9809265136719, 224.8406982421875, 286.49761962890625, -158.30348205566406, 292.97674560546875, 122.94437408447266, 255.51515197753906, 342.94598388671875, 177.6456298828125, 296.7921142578125, 411.07098388671875, 136.02503967285156, 103.42829895019531, 412.6564025878906, 157.26937866210938, -254.9859619140625, 375.72613525390625, 180.06109619140625, -94.17669677734375, 85.7203598022461, -83.43716430664062, 106.93207550048828, 167.3606414794922, 233.04283142089844, 244.7938690185547, -33.556610107421875, -53.979740142822266, 44.6824951171875, 484.8486633300781, 180.60549926757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 124.68099975585938, "std": 207.5137176513672, "min": -262.0140380859375, "p10": -124.8730773925781, "median": 98.01498413085938, "p90": 411.819470214844, "max": 847.55078125, "pos_frac": 0.75, "sample": [93.928466796875, -40.060211181640625, 160.06918334960938, 84.0589599609375, 102.10150146484375, 147.17234802246094, 16.232452392578125, -102.96253204345703, 156.16653442382812, 30.86345672607422, 145.12551879882812, 195.7013397216797, 235.00033569335938, 57.27154541015625, -146.83360290527344, -262.0140380859375, 211.71005249023438, -38.4985466003418, 110.88494873046875, 224.828125, 8.45889663696289, 215.4497833251953, 531.890625, 336.3952941894531, 338.4203796386719, -199.02001953125, 461.307861328125, 316.4601745605469, -2.2264938354492188, 13.504129409790039, 112.72345733642578, 206.0488739013672, 67.13114166259766, -197.69894409179688, 17.272014617919922, 209.94805908203125, -49.169166564941406, 439.81805419921875, -104.31784057617188, 48.76956558227539, -17.724571228027344, -209.93707275390625, 61.01512908935547, 462.79376220703125, 6.001230239868164, 239.4207763671875, 5.008903503417969, 346.48944091796875, 79.32373809814453, 206.14337158203125, 847.55078125, 152.6879425048828, 90.72634887695312, -6.394859313964844, -169.01141357421875, 585.7459716796875, 127.45355224609375, 239.49554443359375, 161.37667846679688, 71.59211730957031, -133.68246459960938, 547.60009765625, 163.33847045898438, -29.3411865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 132.70704650878906, "std": 189.4808349609375, "min": -447.93841552734375, "p10": -60.842465972900385, "median": 103.9317512512207, "p90": 369.8492980957032, "max": 572.7333984375, "pos_frac": 0.765625, "sample": [572.7333984375, -141.3651123046875, 22.406158447265625, 375.416015625, 184.29351806640625, -52.960784912109375, -51.11016082763672, -24.914993286132812, -447.93841552734375, 82.66246032714844, 305.95947265625, 112.58836364746094, -11.237213134765625, 30.243541717529297, 249.99746704101562, 270.0113830566406, -162.53543090820312, -235.30667114257812, 152.63516235351562, 310.76361083984375, 239.47964477539062, 48.111724853515625, 44.95317840576172, 18.143741607666016, 95.26997375488281, -64.22032928466797, -153.85614013671875, 397.38690185546875, -20.483684539794922, 201.64266967773438, 224.29173278808594, 241.33741760253906, 91.24006652832031, 332.2598571777344, 384.7375793457031, 310.6517639160156, 275.3226013183594, 382.4112548828125, 329.497802734375, 569.5404052734375, 51.13581085205078, -13.148696899414062, 274.5777893066406, 155.65609741210938, 25.272258758544922, 224.79469299316406, 76.75204467773438, 258.4145202636719, -143.98121643066406, 356.86029052734375, 412.49957275390625, 61.294395446777344, 138.04263305664062, 34.581459045410156, -16.416099548339844, 145.87881469726562, 41.139991760253906, 95.27513885498047, 41.344200134277344, 156.44842529296875, -47.429344177246094, 65.27197265625, 306.59161376953125, 302.33453369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 146.28195190429688, "std": 183.1212921142578, "min": -314.4010925292969, "p10": -49.21319885253906, "median": 135.71261596679688, "p90": 372.2518524169922, "max": 622.768798828125, "pos_frac": 0.765625, "sample": [108.65626525878906, 569.931640625, 108.6969985961914, 232.3802947998047, 206.5738525390625, 273.13226318359375, 40.72924041748047, 195.81646728515625, -26.365291595458984, 21.325546264648438, 313.8954772949219, 216.34945678710938, 206.19784545898438, 365.5526123046875, 269.09417724609375, -141.25462341308594, 72.07586669921875, 228.95509338378906, -314.4010925292969, 41.4540901184082, 289.481689453125, 120.22636413574219, -165.9376983642578, 331.1501770019531, 59.51813888549805, 283.1534729003906, -61.512168884277344, -52.28504943847656, -88.81571960449219, 196.72096252441406, 413.2892150878906, -7.838531494140625, 239.26065063476562, 78.8306884765625, 74.329833984375, 136.85276794433594, 411.7208251953125, -40.69059753417969, 66.75785827636719, 47.29124450683594, -161.18724060058594, 50.186248779296875, 33.543792724609375, 13.006317138671875, 167.00592041015625, -21.03439712524414, 229.91552734375, 140.15750122070312, -34.472320556640625, 275.91851806640625, -42.04554748535156, 134.5724639892578, -15.928077697753906, 198.95050048828125, 352.87445068359375, 429.96337890625, 162.5159454345703, 66.6259765625, 622.768798828125, 530.4984130859375, 239.29971313476562, 375.1229553222656, -20.28212547302246, 313.7675476074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 151.95468139648438, "std": 195.22584533691406, "min": -313.673095703125, "p10": -50.88205528259277, "median": 132.08747100830078, "p90": 445.9433868408203, "max": 677.869873046875, "pos_frac": 0.796875, "sample": [476.0771484375, -49.131587982177734, 26.938583374023438, 135.1664276123047, 374.80328369140625, 8.08111572265625, 259.0820617675781, 443.9334716796875, 247.52175903320312, 162.62948608398438, -27.441640853881836, -51.63225555419922, 116.72836303710938, 313.4749755859375, 153.06753540039062, 28.966033935546875, 228.7227783203125, 260.4646301269531, 28.82980728149414, 446.8047790527344, 45.94007873535156, 401.2480163574219, 289.37103271484375, -160.3641357421875, 100.06193542480469, 132.5591278076172, -135.41954040527344, 18.13159942626953, -30.599895477294922, 491.7574768066406, 177.10069274902344, 264.18316650390625, 279.0517578125, 677.869873046875, 131.61581420898438, -58.430419921875, 114.64634704589844, 0.07093429565429688, 132.91461181640625, -313.673095703125, 101.6985855102539, 466.9471130371094, 49.38423538208008, 244.679931640625, 195.53465270996094, 223.99017333984375, 63.38417053222656, -3.8721542358398438, 556.1361694335938, 260.77471923828125, 277.0165710449219, 308.3380126953125, 14.734626770019531, 116.18163299560547, 53.467376708984375, 536.2830810546875, -93.05276489257812, -12.642288208007812, 169.53564453125, -210.90122985839844, 33.84452819824219, 40.37663269042969, -19.192581176757812, 211.33038330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 148.65061950683594, "std": 212.47198486328125, "min": -570.3270263671875, "p10": -64.81507492065428, "median": 142.6807861328125, "p90": 386.07298583984374, "max": 738.8070068359375, "pos_frac": 0.796875, "sample": [318.93499755859375, 45.102638244628906, -98.68589782714844, 274.86181640625, 71.88421630859375, 138.37362670898438, 272.60821533203125, 277.7524719238281, 239.16897583007812, -20.14923095703125, -34.14727020263672, -14.475440979003906, -69.08235931396484, 66.26243591308594, 111.46146392822266, -49.21733093261719, -14.463691711425781, 118.39474487304688, 171.8531951904297, 1.6016769409179688, 66.87232971191406, 269.0370178222656, 548.1991577148438, 224.46128845214844, 496.7746276855469, 259.23675537109375, 187.53570556640625, 20.792259216308594, 170.13372802734375, 376.01885986328125, 196.6266632080078, 299.20416259765625, 330.11810302734375, 190.60707092285156, -221.99948120117188, 99.48622131347656, 294.77587890625, -394.61749267578125, 5.482505798339844, 131.8762664794922, 404.99737548828125, 17.004520416259766, -97.56967163085938, 406.26873779296875, 33.03676986694336, -570.3270263671875, 738.8070068359375, 146.88302612304688, -156.2074737548828, 76.91097259521484, 251.3134765625, -54.85807800292969, 319.12042236328125, 387.48480224609375, 382.77874755859375, 42.09636688232422, 306.31024169921875, 460.4103698730469, 153.52378845214844, 285.05242919921875, 76.21699523925781, 138.47854614257812, 312.06512451171875, 95.18201446533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 124.8755111694336, "std": 191.82582092285156, "min": -217.57203674316406, "p10": -97.88839340209961, "median": 93.69174194335938, "p90": 376.1208740234375, "max": 569.7210693359375, "pos_frac": 0.75, "sample": [218.5473175048828, -21.621835708618164, 149.43701171875, 211.7366180419922, 13.732341766357422, -78.30728149414062, 344.7251281738281, 460.1427917480469, 482.4426574707031, 238.7095947265625, 56.698448181152344, -100.85851287841797, 96.06150817871094, -87.25385284423828, 7.501617431640625, -90.95811462402344, 354.374755859375, -54.355255126953125, 480.94195556640625, 178.109375, 143.64828491210938, 126.68243408203125, 93.76446533203125, 66.44056701660156, 310.0704345703125, 29.288909912109375, 72.44412231445312, 39.65142822265625, 569.7210693359375, -211.44288635253906, 73.46859741210938, 32.555694580078125, 542.7753295898438, 193.33285522460938, 34.621299743652344, -182.7185516357422, 147.73260498046875, 338.9146728515625, -161.71817016601562, 340.0338134765625, -164.92416381835938, 15.646873474121094, 373.368896484375, -102.14411926269531, 93.6190185546875, 63.15159606933594, 377.30029296875, 308.2544250488281, -217.57203674316406, 151.6635284423828, -25.232633590698242, 296.45599365234375, 286.195556640625, -32.31959533691406, 395.79962158203125, -78.1821060180664, 120.8212890625, 322.9433898925781, 149.38558959960938, -64.13233184814453, 64.49359130859375, 28.553356170654297, 154.5172119140625, 15.296272277832031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 171.0243682861328, "std": 196.91192626953125, "min": -209.3187255859375, "p10": -15.99048233032226, "median": 134.07290649414062, "p90": 412.4797271728516, "max": 1109.668212890625, "pos_frac": 0.859375, "sample": [228.22482299804688, -59.34954071044922, 54.377113342285156, -99.10269927978516, 13.777303695678711, 183.42608642578125, 119.85875701904297, 194.8769073486328, 393.68792724609375, 47.01356506347656, 71.42625427246094, 433.47357177734375, 115.76902770996094, 113.00282287597656, 169.2802276611328, 1109.668212890625, 172.4838104248047, 260.6913146972656, 44.037811279296875, 478.659423828125, -209.3187255859375, 165.87359619140625, -44.008880615234375, 164.9341278076172, -105.42938995361328, -6.691627502441406, 161.9884490966797, -18.496566772460938, 250.91476440429688, 129.2352294921875, 138.54397583007812, 371.0994873046875, -10.142951965332031, 278.7601318359375, 138.63954162597656, 380.7367858886719, 399.6031494140625, 164.402099609375, 105.81254577636719, 100.70736694335938, 108.86164855957031, 66.2216796875, 347.766357421875, 120.77054595947266, 165.03643798828125, 15.921932220458984, 43.33442687988281, 99.79782104492188, 191.10467529296875, 208.0526580810547, 221.4334716796875, 189.20260620117188, 129.60183715820312, 437.8387451171875, 36.821739196777344, -44.72119903564453, 79.42599487304688, 431.76849365234375, 16.87712860107422, 610.2061157226562, 106.20687103271484, 417.9982604980469, 47.3170166015625, 296.26910400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 158.46177673339844, "std": 215.7183074951172, "min": -322.2063903808594, "p10": -57.55540847778318, "median": 142.3313446044922, "p90": 408.1753173828125, "max": 943.3585815429688, "pos_frac": 0.765625, "sample": [406.9898376464844, 408.6833801269531, 45.565834045410156, -246.45005798339844, 125.31739807128906, 943.3585815429688, 29.197919845581055, 372.1781005859375, -33.125492095947266, 230.97366333007812, 35.51031494140625, 85.02591705322266, -136.54119873046875, 427.47552490234375, 139.9716033935547, 232.36703491210938, 245.300537109375, 385.85809326171875, 379.75372314453125, -38.73401641845703, 98.65962219238281, 84.56967163085938, 144.6910858154297, 206.430908203125, 192.60812377929688, 322.03375244140625, 535.368896484375, 14.00421142578125, 324.9841003417969, 220.19285583496094, 74.77357482910156, 31.26020050048828, -1.3742218017578125, 88.68989562988281, 266.67828369140625, 54.14226531982422, 291.16326904296875, 389.8250732421875, 266.62109375, 147.0626220703125, 213.4549560546875, -28.371883392333984, 510.2451477050781, 429.7139892578125, -6.991205215454102, 396.50909423828125, -194.5102996826172, 231.64732360839844, -5.736473083496094, 239.791259765625, 117.8246841430664, 508.11907958984375, -13.427070617675781, -322.2063903808594, -192.4288330078125, 117.1373291015625, -211.07290649414062, -16.873098373413086, 156.04092407226562, 200.5883026123047, 60.30455780029297, 65.52205657958984, 160.83270263671875, -65.62171936035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 131.42083740234375, "std": 172.41506958007812, "min": -238.1148223876953, "p10": -74.82604560852049, "median": 131.45094299316406, "p90": 296.7111846923828, "max": 778.0416870117188, "pos_frac": 0.796875, "sample": [86.34834289550781, 434.7345886230469, 114.71891021728516, 211.55177307128906, -238.1148223876953, 329.0587463378906, 471.826904296875, 126.92463684082031, 152.289794921875, 270.1745300292969, -150.09446716308594, -222.847412109375, 153.79098510742188, 778.0416870117188, 186.20947265625, -178.40451049804688, 169.4578399658203, 276.6352233886719, -55.873008728027344, 191.65133666992188, 168.57504272460938, 68.47804260253906, -81.64115905761719, 328.8730773925781, 143.9168701171875, -30.125198364257812, 48.211978912353516, 397.7220153808594, 120.095947265625, 296.7274169921875, 45.429466247558594, 89.83321380615234, 83.88156127929688, -58.92411422729492, 95.67018127441406, 79.93619537353516, 184.8245849609375, -26.028610229492188, 159.48092651367188, 163.85438537597656, 118.51217651367188, 155.3582000732422, 12.139915466308594, 292.314697265625, 250.6124725341797, 108.79473114013672, 256.79156494140625, 131.08331298828125, -97.80977630615234, -45.66889190673828, 83.51301574707031, 39.332122802734375, 46.49150085449219, 296.6733093261719, -169.18801879882812, -51.77635955810547, 131.81857299804688, 185.4437255859375, 261.8998107910156, 237.14254760742188, 191.02963256835938, 49.35124206542969, 284.04010009765625, 256.1611022949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 187.4507598876953, "std": 221.4635772705078, "min": -336.40606689453125, "p10": -61.040560913085926, "median": 155.98514556884766, "p90": 470.0237091064455, "max": 803.8729248046875, "pos_frac": 0.8125, "sample": [347.8799133300781, -336.40606689453125, 155.16940307617188, 155.66050720214844, 379.11041259765625, 54.51604461669922, 336.86669921875, 41.540618896484375, 59.67503356933594, 107.0404281616211, -274.321044921875, 203.0571746826172, 78.05609893798828, 309.42205810546875, 335.0946960449219, 192.23606872558594, 19.683605194091797, 373.8900146484375, 127.30084991455078, 162.3268280029297, -9.713113784790039, -74.38552856445312, 49.51636505126953, 337.3536071777344, -30.290607452392578, -78.66778564453125, 332.8293151855469, 144.67633056640625, -15.423545837402344, 92.44270324707031, 27.527236938476562, 31.2496395111084, 358.7048645019531, 59.23216247558594, -49.78800964355469, 515.3109741210938, 288.48907470703125, 115.50627136230469, 423.6820983886719, 596.407470703125, 94.3998794555664, 543.4097900390625, 120.99017333984375, 284.99749755859375, 332.2549743652344, 170.12765502929688, 489.8843994140625, -65.86308288574219, 242.845703125, 264.317138671875, 395.7500305175781, 423.17657470703125, 706.95361328125, -191.32188415527344, 13.112350463867188, 582.9981689453125, -40.86103057861328, -84.53150177001953, 211.1535186767578, 216.40176391601562, 139.3705291748047, 803.8729248046875, 156.30978393554688, 244.64019775390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 155.2544403076172, "std": 218.0766143798828, "min": -253.74996948242188, "p10": -90.91021499633788, "median": 121.14303588867188, "p90": 452.53844909667976, "max": 682.6435546875, "pos_frac": 0.765625, "sample": [251.59109497070312, 296.6121520996094, 49.82022476196289, 682.6435546875, 75.58747863769531, -219.0690155029297, -183.056396484375, 381.2637634277344, 86.39456176757812, 523.4713134765625, -52.68421173095703, 36.46562957763672, 108.4988021850586, -165.11981201171875, 184.0355224609375, 157.20556640625, 326.5013427734375, 438.41375732421875, 424.3445129394531, 427.0194091796875, 78.63897705078125, 273.2957763671875, 267.9305419921875, 213.39637756347656, 58.26069641113281, 26.026016235351562, -74.32506561279297, -28.006484985351562, -81.5998764038086, 200.8399200439453, -94.90036010742188, -21.68737030029297, 47.28612518310547, 625.6817016601562, 3.5279998779296875, 458.5918884277344, 141.78219604492188, 248.07272338867188, 34.89851379394531, 265.23583984375, 57.336151123046875, 228.91709899902344, 126.56625366210938, 115.71981811523438, -10.862052917480469, 625.7743530273438, 406.87994384765625, -253.74996948242188, 213.1517333984375, -168.66510009765625, 191.25645446777344, 5.848957061767578, 228.36447143554688, 485.02508544921875, -35.65186309814453, 163.7140350341797, 39.647743225097656, 61.931640625, -204.7493133544922, 537.374755859375, 336.19598388671875, 289.8671875, -11.826335906982422, 35.33149719238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 149.31336975097656, "std": 244.80203247070312, "min": -504.84210205078125, "p10": -104.73940582275388, "median": 112.5988540649414, "p90": 435.7701599121094, "max": 838.3643798828125, "pos_frac": 0.75, "sample": [90.9531478881836, 423.6453857421875, 364.9139709472656, 242.2597198486328, 53.09398651123047, 172.75921630859375, 342.736328125, 26.88164520263672, 705.2894287109375, 86.2408447265625, 244.386962890625, -301.1594543457031, 12.47525405883789, -11.44321060180664, -129.4320068359375, 57.849178314208984, 357.6068420410156, -115.29544067382812, 100.27838897705078, -63.1187744140625, 258.29071044921875, 838.3643798828125, -504.84210205078125, 556.20166015625, -116.58694458007812, 151.65707397460938, 229.24789428710938, 466.7527770996094, -80.10865783691406, 761.3195190429688, -230.5754852294922, 296.7624206542969, 63.63111877441406, -53.6014518737793, 91.77963256835938, -45.808563232421875, 5.75909423828125, -50.53387451171875, 396.7675476074219, -77.35142517089844, 458.5334167480469, 109.8984375, 81.42938995361328, 392.4795837402344, -25.722145080566406, 171.90097045898438, 20.598388671875, 169.0902099609375, -27.066604614257812, 277.5442810058594, 92.41826629638672, 179.88232421875, 132.61813354492188, 412.0621032714844, 169.8807373046875, 150.61634826660156, 226.67640686035156, 115.29927062988281, 53.283321380615234, -332.1430969238281, 440.96649169921875, 96.08224487304688, 334.05828857421875, 237.6222381591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 120.13715362548828, "std": 199.96270751953125, "min": -288.4902648925781, "p10": -86.67381362915039, "median": 75.20017623901367, "p90": 426.7278686523438, "max": 656.272705078125, "pos_frac": 0.703125, "sample": [-18.84954833984375, 537.6435546875, 61.814002990722656, 124.98650360107422, 283.1392822265625, 125.26814270019531, -38.63923645019531, 293.4803161621094, 73.21041870117188, 430.328857421875, 122.20668029785156, 293.56610107421875, -70.71394348144531, 262.4126281738281, -78.91361999511719, 67.34153747558594, 278.97918701171875, 90.33187866210938, 215.3141326904297, 55.232826232910156, 129.9258270263672, -275.1231994628906, -89.9996109008789, 389.4710693359375, 154.87185668945312, 458.0047912597656, 159.86610412597656, 20.409950256347656, 34.818397521972656, 418.3255615234375, 494.98291015625, 313.65325927734375, 656.272705078125, 456.68798828125, 231.6717987060547, 74.416259765625, 49.32139205932617, 259.02215576171875, 38.87696075439453, 75.98409271240234, 59.970489501953125, 156.0662841796875, 64.8038558959961, -26.44194984436035, 162.38209533691406, -15.691509246826172, -288.4902648925781, -248.054931640625, 290.355224609375, 69.25713348388672, -144.87222290039062, -66.17586517333984, -21.174758911132812, -138.05755615234375, -25.998336791992188, -40.13963317871094, -21.031213760375977, -27.76195526123047, -187.83291625976562, 436.2995300292969, 205.94561767578125, 232.50509643554688, 15.473976135253906, 87.84161376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 146.06735229492188, "std": 214.38150024414062, "min": -495.1610412597656, "p10": -56.861357498168935, "median": 126.55362319946289, "p90": 345.64320373535156, "max": 760.2772216796875, "pos_frac": 0.796875, "sample": [82.3231430053711, 170.62649536132812, 340.02490234375, -46.6252326965332, 136.26092529296875, -10.178266525268555, 287.3731994628906, 161.53750610351562, 4.258270263671875, 25.332233428955078, -40.76129150390625, -72.49356079101562, 15.4085693359375, 345.1974792480469, 179.3680419921875, 122.81694793701172, -89.81856536865234, 233.88302612304688, 528.8134765625, 743.5308227539062, 91.70362854003906, 82.41907501220703, 140.39710998535156, 6.6216888427734375, -145.6216583251953, 97.5156478881836, 85.99932861328125, 107.66121673583984, 345.834228515625, 218.32618713378906, 574.6083984375, 419.8919372558594, 760.2772216796875, 39.453460693359375, -8.958301544189453, 293.2537841796875, 22.746501922607422, -61.248268127441406, 13.08987045288086, 165.10211181640625, 275.3046875, 19.727081298828125, -6.118877410888672, 587.73583984375, -495.1610412597656, -19.35448455810547, -363.93902587890625, 208.04461669921875, 193.83160400390625, 114.18386840820312, 225.27073669433594, 248.47586059570312, 100.79991149902344, 290.1578369140625, 76.63941192626953, 146.52516174316406, -74.32212829589844, 336.4830627441406, 71.1436538696289, 139.46368408203125, 314.113525390625, 173.40447998046875, 289.6595764160156, 130.29029846191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 150.69021606445312, "std": 217.39849853515625, "min": -417.1619873046875, "p10": -48.48125381469726, "median": 136.83394622802734, "p90": 376.008856201172, "max": 834.7050170898438, "pos_frac": 0.828125, "sample": [9.680580139160156, 466.7388916015625, 90.00299835205078, -44.378746032714844, 77.63764953613281, 65.77169036865234, 148.06101989746094, 97.45014953613281, 388.7237548828125, 102.25767517089844, 200.845947265625, -346.62506103515625, 184.03506469726562, 49.166229248046875, 279.9761657714844, -226.63638305664062, 134.1185302734375, 73.55708312988281, -244.25534057617188, 139.5493621826172, 170.22540283203125, 834.7050170898438, 92.56703186035156, 150.191650390625, 74.81089782714844, 301.287841796875, 478.81201171875, 565.5496215820312, -31.695884704589844, 346.34075927734375, -81.85074615478516, 76.08485412597656, -50.239471435546875, -2.214435577392578, 611.115234375, 165.3040008544922, 210.32168579101562, 163.27638244628906, 243.49343872070312, 247.4038543701172, 160.89678955078125, 314.93988037109375, 11.398811340332031, -58.81513977050781, 338.21343994140625, 30.38958740234375, 204.2572479248047, 311.7001953125, 8.100860595703125, 258.50390625, 209.3256378173828, 118.81761169433594, 736.6690063476562, 215.94125366210938, 77.5870361328125, 50.93180847167969, 130.6639862060547, 28.26868438720703, 335.1182861328125, 7.640764236450195, 212.9683837890625, -417.1619873046875, -29.671131134033203, 176.32240295410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 172.00210571289062, "std": 230.6515350341797, "min": -461.2736511230469, "p10": -96.62476730346678, "median": 157.4090576171875, "p90": 454.7308959960938, "max": 702.2151489257812, "pos_frac": 0.75, "sample": [131.1993865966797, 264.98236083984375, 146.77391052246094, 330.98272705078125, -88.93831634521484, 232.02362060546875, 263.78955078125, 168.04420471191406, -109.75651550292969, -49.098045349121094, 314.1315002441406, -40.261085510253906, 498.0959777832031, 17.58441162109375, 376.48175048828125, 306.87054443359375, -27.134376525878906, 194.5541534423828, -99.91896057128906, -461.2736511230469, 305.3946533203125, 17.075672149658203, 233.83584594726562, -211.63510131835938, -84.19779968261719, 430.30572509765625, -5.673179626464844, 638.360107421875, 511.81134033203125, 121.67242431640625, 141.9725341796875, 81.08258056640625, 431.55255126953125, 295.2222900390625, 4.2537994384765625, 342.845703125, 439.4634704589844, 113.7399673461914, 291.43695068359375, 14.6636962890625, 22.90027618408203, 140.9940643310547, -213.96507263183594, 27.678314208984375, 135.54962158203125, 553.5188598632812, 330.7432861328125, 515.136962890625, 338.7759094238281, -41.80632019042969, 309.3341369628906, 262.1252746582031, 0.355865478515625, 702.2151489257812, -128.17640686035156, 461.2740783691406, -18.5155029296875, 15.368705749511719, -25.992473602294922, 237.2620849609375, 436.6671142578125, 439.3267822265625, -174.580810546875, 199.62881469726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 128.63064575195312, "std": 206.1525115966797, "min": -428.5291442871094, "p10": -77.9128646850586, "median": 126.37028503417969, "p90": 362.4680908203125, "max": 785.8900146484375, "pos_frac": 0.765625, "sample": [241.26058959960938, 311.7138977050781, 441.4210205078125, -293.0443420410156, -428.5291442871094, -58.42851257324219, 50.816490173339844, 295.3153381347656, 120.18994903564453, 325.1404724121094, 356.9713134765625, -50.410003662109375, 139.18798828125, 231.75648498535156, 17.65521240234375, 50.9736213684082, 92.17855072021484, 134.2962646484375, -170.43112182617188, 224.91360473632812, 263.8642578125, 142.6193084716797, -27.422386169433594, 192.76162719726562, 474.6627197265625, 271.7133483886719, 6.009707450866699, 101.53165435791016, 219.77139282226562, 130.23626708984375, -256.16021728515625, 377.18218994140625, 785.8900146484375, -78.63294982910156, -110.27093505859375, 298.57745361328125, 273.58380126953125, 36.70526123046875, 103.47196960449219, -2.7162818908691406, 585.566650390625, -223.78369140625, 182.72695922851562, 13.413341522216797, 124.44967651367188, 128.2908935546875, 24.144363403320312, 230.603271484375, -76.232666015625, -12.368297576904297, 247.679443359375, 307.0276184082031, 51.10487365722656, 13.901182174682617, 13.304489135742188, 175.29373168945312, -6.2633819580078125, 57.49412536621094, -33.243263244628906, 364.8238525390625, 165.49935913085938, 140.87765502929688, 57.52280807495117, 464.2027587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 170.47811889648438, "std": 193.81561279296875, "min": -170.56869506835938, "p10": -23.678686141967773, "median": 124.66264724731445, "p90": 450.6236572265625, "max": 749.2081909179688, "pos_frac": 0.796875, "sample": [749.2081909179688, 397.43841552734375, 119.16219329833984, -53.619991302490234, 271.2344970703125, -21.893024444580078, 396.7301330566406, 378.2065734863281, 15.324287414550781, -10.928810119628906, 351.29620361328125, -147.34542846679688, 452.3583984375, -170.56869506835938, 71.8161392211914, -19.86046600341797, 70.44412231445312, -12.913780212402344, 177.69020080566406, -24.4439697265625, -82.61277770996094, 7.7742462158203125, 492.63916015625, 365.25445556640625, 530.9806518554688, 446.575927734375, 77.82353973388672, -56.81828308105469, 63.115821838378906, 126.9026870727539, -5.281154632568359, 251.7760467529297, 155.07269287109375, -13.585350036621094, -50.61444091796875, 144.83921813964844, 105.60409545898438, 34.30133819580078, 419.13543701171875, 122.422607421875, 21.39427947998047, 537.5142211914062, 62.572898864746094, 4.844175338745117, 115.38542175292969, 18.361583709716797, 119.7545166015625, 171.6053924560547, 333.2038879394531, 205.19491577148438, 501.50262451171875, 305.394287109375, 140.63302612304688, 44.97981262207031, 494.61767578125, 20.676612854003906, 226.3814697265625, 355.052001953125, 69.10641479492188, 214.22154235839844, 174.50729370117188, 264.4947204589844, 139.88137817382812, 244.67823791503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 204.4078369140625, "std": 195.80520629882812, "min": -190.6487579345703, "p10": -63.94967269897461, "median": 214.42191314697266, "p90": 460.3670501708985, "max": 677.3529052734375, "pos_frac": 0.78125, "sample": [282.41033935546875, 244.2353515625, -39.98643493652344, -78.78565979003906, -21.919540405273438, 422.58514404296875, 247.25816345214844, 466.82073974609375, 484.64080810546875, 303.500244140625, 352.5870361328125, 389.7976989746094, -85.28675079345703, 200.10635375976562, -144.47544860839844, 64.31379699707031, 88.88542938232422, 217.7841339111328, 186.48214721679688, 177.646728515625, -92.81378173828125, 273.5528564453125, 352.6561584472656, 219.97930908203125, 329.6940002441406, 100.88802337646484, -12.06014633178711, 281.1163330078125, 123.80265045166016, 677.3529052734375, 42.61805725097656, 241.01504516601562, 219.4859619140625, 135.71389770507812, 80.47796630859375, 399.93890380859375, 483.0151062011719, -190.6487579345703, 193.57254028320312, 254.51220703125, 619.5618896484375, -6.05029296875, 539.4905395507812, 161.7591094970703, 352.90673828125, -63.10717010498047, 335.52447509765625, 26.768383026123047, 232.06813049316406, 445.3084411621094, -30.824996948242188, 199.2668914794922, -0.464324951171875, -79.36564636230469, -64.31074523925781, 310.81103515625, 287.97210693359375, 44.122154235839844, 444.56671142578125, 169.4157257080078, 211.0596923828125, 413.736328125, 479.5628662109375, 179.85403442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 192.7875213623047, "std": 211.58041381835938, "min": -329.67181396484375, "p10": -73.88568954467773, "median": 195.17152404785156, "p90": 460.81867370605477, "max": 708.9423217773438, "pos_frac": 0.796875, "sample": [254.72567749023438, 341.9808349609375, 71.97384643554688, 157.39837646484375, 614.340087890625, 466.98291015625, 186.93655395507812, -91.21036529541016, 609.4458618164062, -31.137405395507812, 355.3369445800781, 225.17169189453125, -12.185577392578125, 62.575767517089844, 1.1461505889892578, 142.56246948242188, -6.053882598876953, 207.97769165039062, 227.34927368164062, 349.42230224609375, 214.65318298339844, 41.819305419921875, 89.19143676757812, 152.59254455566406, 113.61167907714844, 44.207313537597656, 344.44549560546875, 208.83193969726562, 149.74533081054688, 203.406494140625, 318.3419189453125, -26.73040008544922, 359.9744567871094, 318.9502868652344, -23.46256446838379, 676.580078125, 547.0921020507812, 251.86595153808594, 205.09097290039062, 307.6649169921875, -169.53781127929688, -133.04930114746094, 309.60272216796875, -70.28954315185547, 264.9183044433594, 298.86859130859375, 415.78643798828125, 46.27162170410156, 173.9219970703125, -329.67181396484375, 708.9423217773438, 174.10122680664062, 90.85841369628906, -107.88726806640625, -85.32656860351562, 318.6493835449219, 20.546066284179688, 126.13075256347656, 354.54840087890625, -75.42689514160156, 349.11151123046875, 98.34452056884766, 446.4354553222656, 479.941162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 145.5963134765625, "std": 198.2542724609375, "min": -248.1638946533203, "p10": -129.47023162841796, "median": 132.72571563720703, "p90": 398.96130371093756, "max": 644.9273071289062, "pos_frac": 0.765625, "sample": [50.83362579345703, 97.6507568359375, 36.98625183105469, -155.84925842285156, 240.57907104492188, 187.14285278320312, 164.56480407714844, -35.64636993408203, 111.89147186279297, -26.659622192382812, -133.0863037109375, 474.7393493652344, 391.1580810546875, 239.7408447265625, 286.8028869628906, 185.4134979248047, -29.33441925048828, 504.35601806640625, -248.1638946533203, 278.7105712890625, -137.7900848388672, 128.16798400878906, -70.51922607421875, 84.09469604492188, 180.52987670898438, 278.22149658203125, 402.3055419921875, -140.53713989257812, 93.89412689208984, 644.9273071289062, -39.09364700317383, 101.35665893554688, 289.8938293457031, 174.67672729492188, 16.688989639282227, 185.6842803955078, 452.65667724609375, -29.588226318359375, 197.41835021972656, 88.55329895019531, 288.0478210449219, 3.0040435791015625, 134.21925354003906, 296.61279296875, 372.0082092285156, -76.18721771240234, 124.61412048339844, 9.246953964233398, 171.31808471679688, 554.5057373046875, 317.0419616699219, 172.2782745361328, 2.4698104858398438, 131.232177734375, -203.93978881835938, 326.9708251953125, -143.76675415039062, -121.03273010253906, 565.243896484375, 178.51315307617188, 124.15289306640625, 371.639892578125, 7.051483154296875, 189.54754638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 160.09921264648438, "std": 197.6660919189453, "min": -265.2099304199219, "p10": -73.65626678466796, "median": 159.21900177001953, "p90": 427.78470764160164, "max": 607.6845703125, "pos_frac": 0.78125, "sample": [195.98805236816406, 62.54988098144531, 329.9980163574219, 5.095500946044922, -6.168848037719727, -74.75553131103516, 403.0414123535156, 161.1547393798828, 522.4774169921875, -130.6414794921875, 245.37078857421875, 396.2940673828125, 572.1036987304688, 82.37928009033203, 170.28170776367188, 296.7982177734375, -38.69601821899414, 22.936019897460938, 153.25253295898438, 207.66539001464844, -132.44940185546875, 227.48129272460938, 450.6007385253906, 13.093055725097656, 9.436683654785156, -198.93228149414062, 327.402099609375, 157.28326416015625, 173.11102294921875, 65.43346405029297, -79.89958953857422, 43.11338806152344, 360.8463134765625, 222.59768676757812, -14.304780960083008, 41.504638671875, -193.85952758789062, -11.562347412109375, -52.17681121826172, 29.273300170898438, 98.29995727539062, 366.88665771484375, -265.2099304199219, 180.02218627929688, 242.60939025878906, 459.4648132324219, 215.69857788085938, 607.6845703125, 238.00515747070312, 499.17889404296875, 438.38897705078125, -24.593469619750977, 258.9211120605469, 364.7020263671875, 62.230873107910156, 313.5560607910156, 155.62413024902344, 366.09814453125, -71.09131622314453, 215.68896484375, 58.557655334472656, 322.4261169433594, 62.020263671875, 66.06356811523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 160.9514617919922, "std": 223.380126953125, "min": -293.21759033203125, "p10": -78.48564605712889, "median": 156.62210845947266, "p90": 459.65664062500014, "max": 896.7323608398438, "pos_frac": 0.796875, "sample": [111.88512420654297, 896.7323608398438, 203.51890563964844, 473.2923583984375, 503.42669677734375, 7.093023300170898, -17.555694580078125, 640.2945556640625, 48.38203430175781, 197.4744415283203, 162.5699920654297, 16.15912628173828, -41.6461181640625, 265.74359130859375, 293.1999816894531, 313.8934631347656, 198.56088256835938, -293.21759033203125, 125.82810974121094, -11.427120208740234, 29.794933319091797, 355.47149658203125, 226.82249450683594, 348.919189453125, -88.22950744628906, -248.56106567382812, -20.85171890258789, 77.31214904785156, 216.63076782226562, 344.8362731933594, -55.749969482421875, 171.79298400878906, 37.97875213623047, 150.67422485351562, 354.87017822265625, 267.472900390625, -190.47708129882812, -261.99957275390625, 7.220766067504883, 15.437984466552734, -143.1179656982422, 23.7493896484375, 84.4998779296875, 497.0780334472656, 93.7077407836914, 147.06600952148438, 250.80787658691406, 197.1218719482422, 289.0141296386719, 28.848793029785156, 491.3009033203125, -34.04557800292969, 165.7733154296875, 248.11248779296875, 32.914703369140625, 251.54115295410156, 244.79234313964844, 195.15809631347656, 392.19976806640625, 91.53805541992188, -162.2613525390625, 615.5262451171875, 38.15333557128906, 427.8399658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 164.33425903320312, "std": 219.0990447998047, "min": -277.97503662109375, "p10": -124.62420425415037, "median": 143.84417724609375, "p90": 467.99080200195317, "max": 581.8347778320312, "pos_frac": 0.78125, "sample": [5.107326507568359, 332.56854248046875, -189.91029357910156, 146.935791015625, 34.57699203491211, 291.2998046875, 403.146240234375, 186.85366821289062, -131.47267150878906, 214.19754028320312, 220.51181030273438, -29.339025497436523, 426.5616455078125, -77.8724136352539, 224.18360900878906, 402.41412353515625, 35.852012634277344, 138.42193603515625, 215.92050170898438, 310.44281005859375, -86.3983383178711, 401.5262145996094, 54.29011535644531, 56.27521514892578, -229.6639404296875, -108.64444732666016, 562.50732421875, 9.546905517578125, -30.4005126953125, 172.90811157226562, -277.97503662109375, 581.8347778320312, 166.55422973632812, 357.4470520019531, -133.55303955078125, -208.99826049804688, 553.06103515625, 299.5986633300781, 139.34866333007812, -37.371551513671875, 270.02813720703125, 33.40777587890625, 557.4440307617188, 6.081775665283203, 11.759674072265625, 121.52027130126953, 193.42486572265625, 409.1206359863281, 472.0581970214844, 536.3671875, -166.63967895507812, 278.26300048828125, 79.43592071533203, 87.90750885009766, 13.724777221679688, -2.722789764404297, 275.04241943359375, 140.7525634765625, 458.5002136230469, 489.2029724121094, 108.31130981445312, 179.39071655273438, 115.6843490600586, 447.0335388183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 193.6494598388672, "std": 219.17713928222656, "min": -192.08587646484375, "p10": -64.57553367614744, "median": 154.4925994873047, "p90": 542.2940002441406, "max": 698.8521118164062, "pos_frac": 0.796875, "sample": [-115.82504272460938, 382.1745300292969, -75.57759857177734, 85.62303161621094, 328.7583312988281, 347.19921875, 267.8039245605469, 82.211181640625, 187.56362915039062, 113.75543975830078, 300.3785095214844, 191.14247131347656, 41.61414337158203, -110.45721435546875, 163.80459594726562, 299.614990234375, -17.461029052734375, 340.32427978515625, 547.6553955078125, 10.559684753417969, 9.27308464050293, 698.8521118164062, 447.068359375, 216.5992431640625, 325.3146057128906, 297.14044189453125, 115.60529327392578, -15.697639465332031, 183.348876953125, 531.595458984375, 485.2248840332031, -69.53184509277344, 135.763671875, 69.322021484375, 588.2929077148438, 551.7653198242188, 25.907981872558594, -34.7655029296875, 72.59725189208984, 202.19964599609375, 104.3628158569336, 448.00750732421875, 186.38868713378906, 69.22206115722656, 42.526947021484375, 546.8790893554688, 145.18060302734375, 259.18572998046875, -71.38812255859375, 649.1378784179688, 185.5625457763672, 135.45565795898438, 47.99603271484375, 8.583854675292969, 337.1813049316406, 344.54132080078125, -31.967302322387695, -34.59117889404297, 694.64990234375, -192.08587646484375, 376.16107177734375, 120.21926879882812, -131.37258911132812, -53.010807037353516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 191.7390594482422, "std": 251.53948974609375, "min": -407.09808349609375, "p10": -83.80508193969725, "median": 174.37875366210938, "p90": 507.7025787353516, "max": 957.05517578125, "pos_frac": 0.75, "sample": [79.5736083984375, 85.8685073852539, 61.57719802856445, 21.6474609375, -44.0084228515625, -63.97423553466797, 436.3453369140625, 513.7991943359375, -5.813117980957031, -31.227291107177734, -21.28638458251953, 288.38818359375, 177.03411865234375, 57.03936767578125, 524.0447998046875, 640.9852294921875, 52.97639083862305, 171.723388671875, 364.625732421875, 282.5329284667969, 723.6996459960938, 88.5793685913086, 23.14459800720215, -324.6817626953125, 52.335540771484375, 64.77910614013672, -2.377166748046875, 182.67010498046875, 190.46524047851562, 596.8856201171875, 141.98858642578125, 70.61287689208984, 252.51412963867188, -44.83287811279297, 273.3522644042969, 454.00982666015625, 257.31231689453125, 957.05517578125, 473.01251220703125, 368.0992431640625, -3.9429168701171875, -92.30401611328125, -124.64620971679688, -407.09808349609375, 194.9375762939453, 493.4771423339844, 83.63125610351562, 255.4005889892578, 377.09222412109375, 136.17857360839844, 437.47161865234375, 331.424560546875, 81.60367584228516, -157.8084716796875, 538.7215576171875, 235.8753662109375, 439.9707946777344, 313.27374267578125, -107.4295654296875, -167.2891387939453, 438.566162109375, 335.1585998535156, -18.60922622680664, 267.1665954589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 194.39535522460938, "std": 205.38641357421875, "min": -345.3467102050781, "p10": -67.46446228027342, "median": 175.59669494628906, "p90": 460.04845886230476, "max": 606.3220825195312, "pos_frac": 0.8125, "sample": [115.47285461425781, 356.7938232421875, 107.01456451416016, 270.6326904296875, 30.862014770507812, -36.85816192626953, 397.5556335449219, 527.4308471679688, 569.0150146484375, 54.543426513671875, 238.1449432373047, 25.915321350097656, -74.72138977050781, 392.54998779296875, 394.1722717285156, 404.33514404296875, 146.03826904296875, 271.12750244140625, 201.33282470703125, 297.0656433105469, 73.0391845703125, 559.4133911132812, 465.7880554199219, 209.7966766357422, 258.7417907714844, 73.99661254882812, 377.1192626953125, 368.5892333984375, -4.9822998046875, 261.15155029296875, 200.77410888671875, 136.55242919921875, 604.3292236328125, 178.1064453125, -40.072540283203125, 157.9355926513672, -219.2119140625, 390.5301818847656, 254.08653259277344, 99.84697723388672, 233.8923797607422, 112.53582000732422, -100.91349792480469, 606.3220825195312, -107.59883117675781, -345.3467102050781, 138.66323852539062, 446.65606689453125, 143.75924682617188, 121.50798797607422, 228.9440155029297, 490.95916748046875, -50.53163146972656, 148.27207946777344, -2.113372802734375, -102.58318328857422, 173.08694458007812, 123.77964782714844, 153.58389282226562, 353.35400390625, 318.6221008300781, -179.10784912109375, 100.26985168457031, 341.3359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 173.53970336914062, "std": 201.3173370361328, "min": -669.559814453125, "p10": -28.226861190795884, "median": 168.77081298828125, "p90": 431.0909973144532, "max": 620.6246948242188, "pos_frac": 0.828125, "sample": [121.63822174072266, 301.2818298339844, 160.94024658203125, 250.4725799560547, 55.96371841430664, 235.29544067382812, 73.32412719726562, 206.21524047851562, 206.17164611816406, 71.05785369873047, 240.10797119140625, 440.3319091796875, 270.57275390625, 79.66661834716797, 273.3289794921875, -84.08590698242188, 187.1467742919922, 620.6246948242188, 81.462646484375, 156.23947143554688, -5.949554443359375, 364.29022216796875, 265.4281005859375, 527.9591064453125, 204.832763671875, 288.6653137207031, 170.67190551757812, 248.2113800048828, -33.99492263793945, 576.1585083007812, 382.73822021484375, 117.2584228515625, 175.54217529296875, 453.84136962890625, 184.16204833984375, -669.559814453125, 84.34938049316406, 87.64305114746094, 105.89916229248047, 58.71986389160156, 509.05780029296875, 0.5975799560546875, -14.768051147460938, 327.4041748046875, 188.74514770507812, 249.44412231445312, 62.839752197265625, 103.77210998535156, 515.8508911132812, 105.65259552001953, -141.71136474609375, 143.93109130859375, -110.37461853027344, 115.62625122070312, -57.17510986328125, -8.0472412109375, -61.92401123046875, 409.52886962890625, 247.8662567138672, 79.95870208740234, 166.86972045898438, 339.63092041015625, -1.5501937866210938, 400.6924133300781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 136.37387084960938, "std": 224.1614990234375, "min": -234.52642822265625, "p10": -109.64613723754883, "median": 117.87100219726562, "p90": 384.93354492187507, "max": 1226.73193359375, "pos_frac": 0.765625, "sample": [477.8559265136719, 137.00015258789062, 87.11228942871094, -234.52642822265625, 205.00965881347656, 265.0628662109375, 105.83309936523438, 41.57275390625, 130.0440673828125, 373.77703857421875, 265.56591796875, 214.13677978515625, 349.0873718261719, 101.82665252685547, -37.98411560058594, 214.17681884765625, 1226.73193359375, 157.42678833007812, 213.74935913085938, 124.13043212890625, 170.68475341796875, 202.60067749023438, 28.189998626708984, -129.611083984375, 548.2398071289062, 400.1378173828125, 104.35094451904297, 194.47398376464844, 150.73776245117188, 525.0441284179688, 294.19683837890625, 230.17117309570312, 312.3487854003906, -26.308868408203125, 6.820232391357422, 132.71841430664062, -116.78233337402344, -110.66524505615234, 74.553955078125, -116.43893432617188, -45.38494873046875, 55.52734375, 154.26622009277344, 129.38833618164062, 249.13002014160156, 2.7930526733398438, -107.26821899414062, 389.71490478515625, 198.23532104492188, 111.611572265625, -48.22992706298828, -62.95072937011719, 27.540390014648438, 51.14767074584961, 84.53962707519531, -202.55743408203125, 327.25616455078125, 31.22541618347168, -80.14189910888672, 415.5785827636719, -233.264892578125, 44.15169906616211, -83.45011138916016, 26.016586303710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 206.7980499267578, "std": 213.18423461914062, "min": -403.2720947265625, "p10": -34.598454284667966, "median": 195.3777084350586, "p90": 494.38722534179686, "max": 636.0501708984375, "pos_frac": 0.828125, "sample": [-73.85012817382812, -104.6548080444336, 242.72874450683594, 29.62787437438965, 379.4417724609375, 347.5524597167969, -28.5828857421875, 267.94500732421875, 450.6940002441406, 36.243324279785156, 49.37334442138672, 188.4125213623047, -54.495235443115234, 96.87336730957031, 81.64141082763672, 235.05506896972656, -403.2720947265625, 61.59354782104492, 490.59759521484375, -64.0111083984375, 471.2692565917969, 488.26873779296875, 165.30885314941406, 353.61663818359375, 18.990978240966797, 190.93710327148438, -34.53523254394531, 496.0113525390625, 405.30792236328125, 516.2532958984375, 50.967315673828125, 300.3272399902344, -154.11351013183594, 203.08349609375, 457.2342529296875, 61.82621765136719, 64.02200317382812, -17.088397979736328, 82.75961303710938, -17.64299201965332, 544.8151245117188, 218.87326049804688, 346.25616455078125, 230.62762451171875, 282.0472106933594, 186.86764526367188, 81.79732513427734, 232.219482421875, 448.6123046875, 321.7102355957031, 636.0501708984375, 24.973411560058594, 400.06475830078125, 172.23321533203125, 73.33208465576172, 275.0550231933594, 556.0303955078125, 199.8183135986328, 320.2196350097656, 619.367919921875, 497.19378662109375, 117.14498138427734, 152.67172241210938, -34.62554931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 188.37283325195312, "std": 210.116455078125, "min": -198.7493133544922, "p10": -38.715443801879864, "median": 170.8418960571289, "p90": 496.1788879394533, "max": 750.127685546875, "pos_frac": 0.859375, "sample": [-95.21109008789062, 97.88624572753906, 342.40679931640625, 41.28343963623047, 113.94867706298828, 61.405059814453125, 373.611083984375, 17.52142333984375, 280.8156433105469, 443.15008544921875, 341.9765625, -22.75387954711914, 527.5421142578125, 17.61724281311035, 291.0566711425781, 20.343494415283203, 38.45673370361328, 246.6153564453125, 326.78521728515625, 518.905517578125, 73.67135620117188, 700.2017211914062, 166.0794677734375, 314.4291076660156, 152.3458251953125, -0.4459075927734375, 175.6043243408203, 36.36727523803711, 283.03375244140625, -46.09474182128906, 35.16339111328125, 736.5671997070312, 203.3301239013672, -65.66844940185547, 216.75030517578125, 100.52215576171875, -79.59561920166016, 58.216148376464844, 241.6348876953125, 750.127685546875, 84.86784362792969, 182.00308227539062, 211.04852294921875, 14.114809036254883, -159.88299560546875, -198.7493133544922, 306.7628479003906, 318.12744140625, 111.72404479980469, 272.8291320800781, 542.3434448242188, 198.78384399414062, 314.2029113769531, 230.9116668701172, 48.6361083984375, 9.032283782958984, -45.556114196777344, 0.9912185668945312, 363.0333251953125, 238.1398468017578, 15.824012756347656, 529.34130859375, 8.871696472167969, 422.8589782714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 173.7642822265625, "std": 209.08729553222656, "min": -587.0213012695312, "p10": -51.792728805541955, "median": 157.44549560546875, "p90": 405.7916503906251, "max": 682.3765869140625, "pos_frac": 0.875, "sample": [411.75927734375, 237.91323852539062, 48.09204864501953, 391.8671875, 140.27731323242188, 130.06057739257812, 162.75607299804688, 66.9331283569336, 146.1448516845703, 682.3765869140625, 318.24151611328125, 206.1768035888672, 374.45904541015625, 131.25225830078125, 152.2095489501953, 608.583984375, 23.75341796875, 125.2630615234375, 157.125244140625, 244.67532348632812, 468.0902404785156, -241.43655395507812, 83.04607391357422, 325.2181701660156, 432.3327941894531, 19.76581573486328, -66.01986694335938, 111.368896484375, 70.4266357421875, 119.46402740478516, 308.59417724609375, 144.2117462158203, -72.3697280883789, 87.8681640625, 284.1674499511719, 332.17724609375, 38.56421661376953, -587.0213012695312, 252.03488159179688, 49.859771728515625, 225.62489318847656, 34.411163330078125, 604.3650512695312, 282.1131591796875, 3.7082138061523438, 312.2423095703125, 276.5303955078125, 157.7657470703125, 255.51080322265625, 365.84051513671875, 200.42974853515625, -216.0251922607422, 300.2902526855469, 221.73287963867188, -202.4553985595703, -18.596073150634766, 473.7344970703125, 142.73379516601562, 116.09517669677734, 161.8468475341797, -175.81375122070312, 342.81756591796875, 189.51651000976562, 146.23110961914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 187.52935791015625, "std": 229.96539306640625, "min": -355.5393371582031, "p10": -100.17984466552734, "median": 168.93006134033203, "p90": 505.29606628417974, "max": 688.6256103515625, "pos_frac": 0.765625, "sample": [73.13683319091797, -83.00084686279297, 441.4543151855469, 105.3948974609375, 168.53097534179688, 190.308837890625, 114.01470184326172, 285.80712890625, 363.3058166503906, -355.5393371582031, 284.25372314453125, 271.3528137207031, 74.89048767089844, -104.46965789794922, 495.3503112792969, 280.0857238769531, 590.03466796875, -9.790679931640625, 239.5377197265625, 169.3291473388672, 525.4047241210938, 125.37823486328125, -288.5899963378906, 445.04302978515625, 251.4387969970703, -72.41943359375, -63.96088790893555, 380.3511962890625, 331.17425537109375, 602.779541015625, 459.6744384765625, -195.96841430664062, 160.91998291015625, -8.087554931640625, 23.035934448242188, 37.18455505371094, 422.31524658203125, 152.5550079345703, 1.427520751953125, -106.06480407714844, 154.25608825683594, -103.00399017333984, 272.1006164550781, -93.59017181396484, 105.7835693359375, 479.0220947265625, 509.55853271484375, 140.89022827148438, -140.2003173828125, 540.7703857421875, -70.9576187133789, 244.58895874023438, 81.95103454589844, 512.3755493164062, 170.022705078125, 301.6125793457031, 70.3056640625, 263.21466064453125, 314.0249328613281, -34.69537353515625, 688.6256103515625, 403.0902099609375, 297.8221435546875, 116.73221588134766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 153.36337280273438, "std": 203.6736297607422, "min": -265.9056091308594, "p10": -93.28328247070313, "median": 128.95438385009766, "p90": 441.1302947998048, "max": 714.4306030273438, "pos_frac": 0.796875, "sample": [156.72972106933594, 11.596771240234375, -9.558755874633789, 52.98772430419922, 154.48509216308594, -31.749664306640625, 311.0992431640625, 596.26025390625, 158.07212829589844, 155.49252319335938, 543.039794921875, 102.85819244384766, 218.65415954589844, 546.50390625, -89.98316955566406, -56.831634521484375, 51.870323181152344, 526.6048583984375, 122.60821533203125, 351.52294921875, 229.27105712890625, 7.1752471923828125, 119.3915786743164, -265.9056091308594, 714.4306030273438, 90.32427215576172, 61.07734680175781, 1.7786865234375, -94.69761657714844, 66.58892059326172, 175.40219116210938, -4.907600402832031, -164.10012817382812, 149.8352813720703, 98.97041320800781, 236.62062072753906, 71.90950012207031, 260.69091796875, 135.30055236816406, 320.48333740234375, -14.232271194458008, 249.2721405029297, 226.61004638671875, 106.10438537597656, 22.166091918945312, 118.86805725097656, 145.38900756835938, 60.66328430175781, 381.25775146484375, 395.088134765625, -172.4882049560547, 51.347618103027344, 229.94541931152344, 61.91478729248047, 452.5054016113281, -232.7449951171875, -145.7406768798828, -101.7278060913086, 248.45870971679688, 337.02386474609375, 471.77191162109375, 414.58837890625, 166.3408203125, 260.9719543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 187.32608032226562, "std": 205.56857299804688, "min": -325.3940734863281, "p10": -2.7537286758422814, "median": 185.69668579101562, "p90": 435.9263610839845, "max": 758.437255859375, "pos_frac": 0.890625, "sample": [179.67709350585938, 184.288818359375, -4.415889739990234, 169.17886352539062, 310.6802978515625, 409.38690185546875, 1.1246471405029297, 758.437255859375, 37.550506591796875, 390.5669860839844, 274.5243225097656, 125.93960571289062, -137.41680908203125, 16.45355224609375, 92.09056091308594, 159.60325622558594, 134.9321746826172, 401.0188293457031, 228.95440673828125, 328.3829040527344, 201.3349609375, 274.7530517578125, 338.4927978515625, 17.33502197265625, 74.11442565917969, 224.97381591796875, 209.34976196289062, 33.99199676513672, 201.79550170898438, 18.333786010742188, 236.4775390625, 211.0433349609375, 621.7444458007812, 146.72482299804688, 447.3004150390625, 311.148681640625, 199.3621063232422, 20.242889404296875, 207.10073852539062, 44.577735900878906, -325.3940734863281, 601.7196044921875, 87.79415893554688, 319.39947509765625, 66.64628601074219, 187.10455322265625, 42.70014190673828, 50.36463928222656, 491.28125, 93.92565155029297, 238.14002990722656, 579.9425048828125, 250.06698608398438, -299.06524658203125, 112.17813110351562, 515.0968017578125, 324.0622863769531, 60.15214538574219, 45.999542236328125, 381.0778503417969, -26.650123596191406, 357.99609375, -135.9799041748047, -130.84580993652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 214.71755981445312, "std": 229.94888305664062, "min": -170.52496337890625, "p10": -48.5607120513916, "median": 159.30533599853516, "p90": 555.6729736328126, "max": 711.5376586914062, "pos_frac": 0.828125, "sample": [125.83233642578125, -1.489990234375, 607.321533203125, 702.248779296875, -146.7937469482422, 595.5355224609375, 415.0654296875, 290.8305969238281, 83.93621826171875, 571.464599609375, 521.3814697265625, 418.272705078125, 281.6148376464844, 521.2159423828125, 398.51470947265625, 301.9654846191406, 29.400497436523438, 711.5376586914062, -56.099021911621094, 272.7913818359375, 121.24391174316406, 28.782468795776367, 154.15206909179688, -23.26705551147461, -110.10342407226562, 31.841060638427734, 293.28289794921875, 453.3445129394531, 72.57894897460938, 155.2887725830078, 407.6087341308594, 663.1213989257812, 347.4825744628906, -49.35895919799805, 82.48169708251953, 38.62632751464844, 565.4953002929688, 100.28633117675781, -120.593505859375, 17.2420711517334, 93.30014038085938, 532.7542114257812, -46.69813537597656, 252.67425537109375, 183.75311279296875, 398.6927490234375, 438.72247314453125, -41.281341552734375, 135.120849609375, -130.5623016357422, 163.3218994140625, 233.83929443359375, 93.837890625, 46.18894958496094, 513.3201904296875, 192.841796875, 22.435171127319336, 19.26488494873047, 148.918701171875, 218.56295776367188, 206.92721557617188, -170.52496337890625, 280.3996887207031, 82.03047943115234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 214.22250366210938, "std": 258.9268493652344, "min": -333.06927490234375, "p10": -65.22724380493163, "median": 205.84259033203125, "p90": 579.0657287597656, "max": 941.0040283203125, "pos_frac": 0.828125, "sample": [532.1793212890625, 190.65713500976562, 122.07469940185547, 160.20941162109375, -333.06927490234375, 376.7663879394531, 455.69598388671875, -50.8271484375, 61.49235534667969, 131.02389526367188, -218.58572387695312, 389.908447265625, 253.373291015625, 46.067169189453125, 271.67120361328125, 234.34060668945312, 535.8052368164062, -32.93682861328125, 233.35287475585938, 30.55417251586914, 35.24714660644531, 33.67808532714844, -173.279296875, 198.89498901367188, 127.87423706054688, -283.2776794433594, 175.5079345703125, 303.0860595703125, 941.0040283203125, 697.2796630859375, 78.39144897460938, 254.8852081298828, 323.775390625, 650.2406005859375, 216.2579345703125, 49.62406921386719, 281.1760559082031, 109.73416137695312, 278.39093017578125, 212.79019165039062, 17.9912109375, 213.67066955566406, 196.07997131347656, 345.594970703125, 55.1656494140625, 291.86663818359375, -68.23580169677734, -58.207275390625, 321.568359375, -103.58468627929688, 699.1774291992188, 465.43048095703125, 234.74539184570312, 568.6884765625, 14.879215240478516, 628.6806030273438, 583.5131225585938, 297.896728515625, -47.6436653137207, 65.74569702148438, 55.26675796508789, 848.5576171875, 251.06671142578125, -68.70782470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 197.35003662109375, "std": 227.34312438964844, "min": -420.2695617675781, "p10": -42.86296081542969, "median": 202.5143051147461, "p90": 469.89605407714845, "max": 668.5720825195312, "pos_frac": 0.78125, "sample": [188.90724182128906, 222.68418884277344, 17.218948364257812, 59.706336975097656, -6.627593994140625, 289.96685791015625, 668.5720825195312, 293.0050964355469, 497.2987060546875, 299.5498046875, 54.30287170410156, 433.34356689453125, 303.2900390625, -32.07714080810547, 615.3367919921875, -30.618438720703125, 432.16387939453125, 244.6149139404297, -20.37003517150879, 239.94334411621094, 457.597412109375, 3.3438568115234375, 283.217041015625, 86.31124877929688, 283.54705810546875, 467.1617431640625, 153.55178833007812, 182.7063446044922, 471.0679016113281, 34.927425384521484, 337.8123474121094, 107.13275909423828, 339.8756103515625, -244.72731018066406, 48.053253173828125, -185.02053833007812, -43.61815643310547, 59.517601013183594, -420.2695617675781, 626.674560546875, 315.03729248046875, 460.3703308105469, 396.38037109375, 228.31182861328125, -106.30961608886719, -36.77580261230469, -83.04361724853516, 376.63482666015625, 67.64144897460938, 173.64962768554688, 100.22755432128906, 179.04098510742188, 347.015869140625, 339.95843505859375, -6.400604248046875, 216.12136840820312, 431.40228271484375, 87.66938781738281, 557.9222412109375, -41.10083770751953, -122.24730682373047, 401.3883056640625, 526.3388061523438, 2.0947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 202.02081298828125, "std": 257.6628723144531, "min": -635.9100952148438, "p10": -59.95448799133299, "median": 191.2347183227539, "p90": 525.6099853515625, "max": 794.8243408203125, "pos_frac": 0.828125, "sample": [224.0745849609375, 48.55636978149414, 11.1871337890625, 197.09786987304688, 298.8122863769531, 171.24188232421875, 132.896728515625, 118.49398040771484, 189.25332641601562, -635.9100952148438, 727.0296630859375, 62.885765075683594, 616.9554443359375, 99.18891906738281, 324.1957092285156, -70.14805603027344, -286.47515869140625, 431.27667236328125, 61.387916564941406, 318.4367370605469, 8.978401184082031, 775.1265869140625, 440.8036193847656, 229.08151245117188, 472.2503356933594, 516.613037109375, 266.7474670410156, 507.5277099609375, 143.27420043945312, 123.15167236328125, -142.59347534179688, 283.41400146484375, 9.880407333374023, 480.1058349609375, -7.476936340332031, 131.01095581054688, -68.02752685546875, 183.1983642578125, 101.82035827636719, 113.19065856933594, 30.408370971679688, 529.4658203125, 198.9283447265625, 193.2161102294922, 209.2211456298828, 208.74896240234375, 485.5193786621094, 320.6925048828125, 794.8243408203125, 0.10510063171386719, 162.91754150390625, 352.56097412109375, 239.7388458251953, -221.83494567871094, 234.03440856933594, -163.361083984375, 666.8927001953125, -9.74190902709961, -41.11739730834961, 586.473388671875, 223.76560974121094, 236.6767578125, 95.97834777832031, -13.295312881469727], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 155.41049194335938, "std": 247.68438720703125, "min": -282.6646728515625, "p10": -123.67295074462889, "median": 128.75980377197266, "p90": 462.4840270996094, "max": 974.429443359375, "pos_frac": 0.703125, "sample": [38.387393951416016, 53.904052734375, 28.26434898376465, 48.212303161621094, 228.74659729003906, 202.3898468017578, 485.7280578613281, 179.3933868408203, 421.400146484375, 49.281578063964844, 445.5638427734375, -98.00514221191406, 230.0711669921875, -10.26717758178711, 974.429443359375, -282.6646728515625, 372.8114318847656, 163.65591430664062, 43.778900146484375, 278.52545166015625, 351.0076904296875, -173.82388305664062, 266.5108947753906, -98.51079559326172, 46.61396789550781, 75.94837188720703, -89.55115509033203, 165.20379638671875, -5.1259307861328125, 509.55047607421875, 472.00146484375, 318.0644226074219, 309.7409362792969, 407.84326171875, 324.6672668457031, -127.85267639160156, 33.439971923828125, 162.84864807128906, 196.03451538085938, -32.317893981933594, -112.2652816772461, 85.44293975830078, -7.956787109375, 242.88406372070312, 283.64508056640625, 378.3242492675781, 201.3398895263672, 340.5416259765625, 98.57554626464844, -262.92633056640625, -6.089263916015625, -233.2656707763672, -167.7499542236328, 469.73553466796875, 857.134033203125, 95.75196075439453, -113.92025756835938, 158.94406127929688, 64.2525634765625, -95.25848388671875, -148.6790771484375, 371.56463623046875, 525.7536010742188, -45.40796661376953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 175.60528564453125, "std": 236.05149841308594, "min": -296.6639099121094, "p10": -109.07910614013666, "median": 101.49251556396484, "p90": 522.4231933593751, "max": 834.739990234375, "pos_frac": 0.859375, "sample": [41.707313537597656, 66.66705322265625, 314.97003173828125, 421.11767578125, 91.7864990234375, 62.05808639526367, -152.82382202148438, 143.705322265625, 5.0067901611328125, 77.99335479736328, 15.560989379882812, 47.68107604980469, -52.07316207885742, 573.7789306640625, 65.29259490966797, 283.1831359863281, 217.35316467285156, 441.1805419921875, 38.939884185791016, 187.6945343017578, 686.1741943359375, -156.34815979003906, 302.6009826660156, 78.64210510253906, 637.10205078125, 464.3879699707031, 332.62786865234375, 154.15431213378906, 183.18206787109375, -133.51022338867188, 225.94534301757812, 281.7846374511719, 306.1146240234375, 834.739990234375, -193.75958251953125, -187.22706604003906, 626.7434692382812, 167.11622619628906, 419.0623474121094, 9.742988586425781, -296.6639099121094, -197.79190063476562, 270.62994384765625, 306.25811767578125, 551.8406982421875, 125.78778076171875, 38.847782135009766, 173.54664611816406, 492.06488037109375, 12.32682991027832, 364.894775390625, 398.8479919433594, -23.840980529785156, 111.19853210449219, 49.8787956237793, 38.3336181640625, 46.4834098815918, 54.110130310058594, 4.745769500732422, 82.82618713378906, 535.4338989257812, 78.81350708007812, 35.326324462890625, 54.783119201660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 194.20144653320312, "std": 257.5389404296875, "min": -339.95867919921875, "p10": -137.04864273071286, "median": 183.87718963623047, "p90": 522.9742126464845, "max": 941.9735107421875, "pos_frac": 0.765625, "sample": [-229.79513549804688, -22.403318405151367, 588.511474609375, 13.671443939208984, -339.95867919921875, 220.0848388671875, -150.6730194091797, 329.9798583984375, 280.9847717285156, -257.5384521484375, 368.55010986328125, -18.828292846679688, 146.4312744140625, 392.27191162109375, 456.8990173339844, 297.4129943847656, 262.232177734375, 297.6666564941406, -105.25843048095703, 111.28316497802734, 331.3984069824219, -37.06798553466797, 162.9921875, 374.2838134765625, 5.509590148925781, 147.698486328125, 571.252685546875, -153.9893341064453, 746.9549560546875, 176.12997436523438, 458.7654113769531, 21.443038940429688, -13.162967681884766, -159.73062133789062, 455.68902587890625, 571.8782348632812, 57.60741424560547, -83.93183135986328, -232.6824493408203, 78.23274230957031, 267.3736877441406, 412.5343933105469, 379.75323486328125, 141.25120544433594, -51.56293487548828, 75.07432556152344, 253.0013885498047, 62.900550842285156, 221.67990112304688, 530.0767822265625, 359.0426025390625, 305.31231689453125, 8.631744384765625, 941.9735107421875, 506.40155029296875, 421.068115234375, 124.76235961914062, 191.62440490722656, 198.88742065429688, 203.38612365722656, 24.82570457458496, 114.23663330078125, 665.486572265625, -49.623313903808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 141.15194702148438, "std": 277.85650634765625, "min": -707.7245483398438, "p10": -237.60516662597652, "median": 153.3268051147461, "p90": 516.5855285644532, "max": 794.7814331054688, "pos_frac": 0.75, "sample": [155.58558654785156, 92.47681427001953, 164.28887939453125, 792.658447265625, -125.6207275390625, 601.598388671875, -264.43902587890625, -109.08625793457031, -54.20337677001953, -707.7245483398438, 125.70169067382812, 212.28765869140625, -315.214111328125, 207.34878540039062, 168.88999938964844, -273.2602844238281, 163.0084991455078, 308.12652587890625, 615.5234375, 416.35345458984375, 230.37896728515625, 271.626220703125, 10.104209899902344, -87.82624816894531, 140.91549682617188, 366.268310546875, 57.128875732421875, -252.7497100830078, 25.560821533203125, -202.2678985595703, 646.15283203125, 0.5566253662109375, 291.5146789550781, 92.6988754272461, 188.85733032226562, 526.0983276367188, 163.9760284423828, 74.74337768554688, -437.85687255859375, 236.69210815429688, -289.50811767578125, 281.3275146484375, -72.88495635986328, -53.794921875, 524.1803588867188, 108.34563446044922, 124.94640350341797, 236.5200958251953, 151.06802368164062, 183.39468383789062, 118.53852081298828, 45.146644592285156, -1.1065292358398438, 794.7814331054688, 233.15928649902344, 225.71444702148438, 103.0202407836914, 75.50511932373047, 498.8642578125, -88.60054016113281, 265.29632568359375, 216.02545166015625, 465.6898193359375, 371.223388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 167.111572265625, "std": 205.38864135742188, "min": -267.58685302734375, "p10": -44.13395462036133, "median": 123.6261978149414, "p90": 445.730419921875, "max": 632.4522705078125, "pos_frac": 0.796875, "sample": [153.78189086914062, 239.18959045410156, 390.3396911621094, 551.4744873046875, 55.30076217651367, 11.630304336547852, 354.925048828125, 194.00424194335938, -160.74325561523438, -63.3973274230957, 42.348358154296875, 155.86529541015625, 207.72598266601562, 447.569091796875, 438.5957336425781, -35.160728454589844, 44.63128662109375, 30.73455810546875, -64.5081787109375, -14.658027648925781, 305.78857421875, -155.7604217529297, 109.89509582519531, -9.852067947387695, 538.6527709960938, 98.1842041015625, 328.21636962890625, 81.44183349609375, 220.35906982421875, 363.5022277832031, 182.14892578125, 88.96319580078125, 416.1990966796875, 167.99940490722656, 441.440185546875, 27.18148422241211, 127.26039123535156, 0.7880325317382812, 554.3251342773438, 299.5846862792969, 25.113662719726562, 195.59762573242188, -202.55224609375, 314.68896484375, 102.07401275634766, -43.72400665283203, 136.5714569091797, 57.845489501953125, 632.4522705078125, -44.30964660644531, 119.99200439453125, 475.2173767089844, 259.63671875, -15.13482666015625, -267.58685302734375, 332.9222412109375, 23.77234649658203, 85.33387756347656, 219.24667358398438, 620.0557861328125, 106.84080505371094, 382.91851806640625, -28.645910263061523, 40.84770202636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 209.22708129882812, "std": 312.6819763183594, "min": -631.8372192382812, "p10": -117.77614288330078, "median": 167.9659194946289, "p90": 624.7857421875, "max": 1423.013916015625, "pos_frac": 0.703125, "sample": [127.44385528564453, -126.27206420898438, 16.629390716552734, -631.8372192382812, 64.19210815429688, -144.41021728515625, -20.08894157409668, 626.211181640625, 259.2545471191406, -27.93707275390625, 144.73101806640625, 469.293212890625, -53.88307189941406, -86.33784484863281, -179.49688720703125, 49.597434997558594, 177.26223754882812, 120.55917358398438, 102.0975112915039, 269.01702880859375, 802.6585693359375, -9.203475952148438, 350.2060546875, -175.1761474609375, 660.07421875, 276.79998779296875, -117.99119567871094, 580.9930419921875, 678.992431640625, 681.68994140625, -125.57067108154297, -117.27435302734375, -54.061729431152344, 6.808753967285156, -17.93714141845703, 542.5133056640625, 683.3770751953125, 165.51927185058594, 433.54254150390625, 240.75296020507812, 0.5631084442138672, -12.987258911132812, 1423.013916015625, 273.9872741699219, 621.459716796875, 241.42861938476562, 432.41180419921875, 451.9358215332031, -6.252651214599609, 113.04617309570312, 241.57858276367188, 409.8404541015625, 357.57330322265625, 140.34854125976562, 535.1093139648438, 170.41256713867188, -30.866518020629883, 353.0453796386719, -81.73808288574219, 255.58102416992188, 185.43930053710938, 232.08543395996094, 60.523712158203125, 380.25592041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 181.78480529785156, "std": 260.2923278808594, "min": -381.77484130859375, "p10": -94.04129791259764, "median": 159.27397918701172, "p90": 566.9421386718751, "max": 845.7861938476562, "pos_frac": 0.75, "sample": [-82.28357696533203, -283.77813720703125, -381.77484130859375, 334.3227844238281, 60.900848388671875, -51.36176681518555, 391.65655517578125, 140.3212890625, 55.899757385253906, 106.5040054321289, 612.0823364257812, 577.1072998046875, 777.3419189453125, -74.30025482177734, 184.3195037841797, 759.6583251953125, -263.60443115234375, 366.4627990722656, 152.46791076660156, 171.30613708496094, -110.13321685791016, 291.96942138671875, 139.22357177734375, 166.08004760742188, 456.7062683105469, 124.80039978027344, 845.7861938476562, -11.499248504638672, 204.37911987304688, -2.4501075744628906, -33.45893096923828, 171.81007385253906, -134.05523681640625, 73.14347839355469, 205.73751831054688, 549.9843139648438, 139.6660614013672, 182.9798126220703, -33.7791748046875, 182.11129760742188, 77.80062103271484, 341.9737243652344, 270.3092041015625, -59.05972671508789, 215.47915649414062, -83.14246368408203, -217.1737823486328, 491.00335693359375, 34.03814697265625, 796.648681640625, -98.71222686767578, 63.44452667236328, 246.81256103515625, 314.70098876953125, 266.70721435546875, 310.4566650390625, 89.10589599609375, 574.2097778320312, 239.02938842773438, 129.92283630371094, 45.53277587890625, 351.6976013183594, 27.887351989746094, 243.30599975585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 218.35943603515625, "std": 303.1878662109375, "min": -470.64691162109375, "p10": -195.9786224365234, "median": 234.48590087890625, "p90": 637.1824279785156, "max": 819.661376953125, "pos_frac": 0.765625, "sample": [360.0739440917969, 293.66278076171875, 161.54092407226562, 372.723388671875, 644.6649780273438, -403.9309997558594, -158.7166748046875, 637.5784301757812, 411.150146484375, 6.636196136474609, -108.0346908569336, 354.72210693359375, 139.4311981201172, 669.8284912109375, 329.47100830078125, 411.27203369140625, 130.97889709472656, 505.7935791015625, 49.35950469970703, 68.21363067626953, -131.13525390625, 212.71670532226562, 256.2550964355469, -6.430732727050781, 484.3183898925781, 342.16290283203125, -155.533203125, 101.77644348144531, 188.73414611816406, 292.121337890625, 95.19120788574219, 650.1676025390625, 300.8475341796875, -470.64691162109375, 510.55712890625, -275.3310241699219, 563.9313354492188, 337.6220703125, 488.9735107421875, 196.91690063476562, 98.11176300048828, 467.24267578125, 636.2584228515625, 380.28277587890625, -36.33010482788086, 378.6064758300781, -108.98631286621094, -211.94802856445312, 753.5333251953125, 427.4375305175781, 451.85699462890625, -320.468505859375, 299.8721008300781, -278.1534423828125, -12.46187973022461, 190.1751708984375, 503.2748718261719, 80.42202758789062, 685.6873779296875, 819.661376953125, -322.08477783203125, 20.020801544189453, 198.46737670898438, 14.892562866210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 210.9517822265625, "std": 265.9326477050781, "min": -233.8741455078125, "p10": -90.76046981811523, "median": 157.4428939819336, "p90": 595.8326049804689, "max": 930.38232421875, "pos_frac": 0.8125, "sample": [191.5420379638672, 326.7794494628906, -92.3899917602539, 374.17071533203125, 368.6170654296875, 328.43865966796875, 782.00537109375, 96.56697082519531, 499.2055969238281, -24.201093673706055, 857.3609619140625, 376.69183349609375, 52.412696838378906, 258.429931640625, -184.63328552246094, 159.4014434814453, 101.49364471435547, 656.1846923828125, 78.52241516113281, 371.84185791015625, 817.0281982421875, 78.58213806152344, -107.17108917236328, 75.79771423339844, 424.2344055175781, 66.69403076171875, 61.933067321777344, 58.94795227050781, 244.08572387695312, 398.7171325683594, 279.952392578125, 281.861572265625, 19.15407943725586, 165.18411254882812, 40.41016387939453, 76.99853515625, 205.76966857910156, -165.3240509033203, 11.336299896240234, -6.73663330078125, 95.79837036132812, 55.88500213623047, 229.76431274414062, -108.34188079833984, 36.79853820800781, 930.38232421875, 303.93402099609375, 195.82827758789062, 630.7749633789062, 58.240234375, 566.8515625, 155.48434448242188, 342.1330871582031, 565.540771484375, 427.1646728515625, 77.98165893554688, -86.958251953125, -219.4534149169922, 608.2530517578125, -233.8741455078125, 96.01173400878906, -74.97677612304688, -22.481014251708984, 264.2760314941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 180.32032775878906, "std": 236.17919921875, "min": -227.47555541992188, "p10": -90.0875030517578, "median": 151.78829193115234, "p90": 458.09677734375003, "max": 877.5239868164062, "pos_frac": 0.765625, "sample": [51.31202697753906, 408.56060791015625, 792.1986694335938, -22.14840316772461, 219.3492431640625, -30.253705978393555, 271.7215881347656, 10.27618408203125, 154.2708282470703, -16.485321044921875, 280.9087829589844, 26.669082641601562, 48.19347381591797, 36.96381378173828, 157.8504638671875, 110.64629364013672, 363.194091796875, -6.748233795166016, 431.0589599609375, 351.1597900390625, 199.5746612548828, 181.19039916992188, 149.30575561523438, 251.84909057617188, -90.67242431640625, -113.77007293701172, 424.0574951171875, 536.955810546875, 103.3615951538086, -207.42442321777344, 213.88409423828125, 8.865989685058594, 156.1046905517578, -69.62016296386719, -114.95164489746094, 560.3201904296875, 877.5239868164062, 464.1866455078125, 156.60986328125, -115.9595947265625, -20.7808837890625, 438.48199462890625, -227.47555541992188, 232.58221435546875, 443.8870849609375, 58.140281677246094, 96.07398986816406, 677.7129516601562, 223.35032653808594, 339.0712890625, 74.67432403564453, 261.23699951171875, -159.48001098632812, 270.8763427734375, 117.02546691894531, 277.82574462890625, -55.24992370605469, 427.58782958984375, 73.19059753417969, 635.7025756835938, -88.72268676757812, 67.13356018066406, 85.77310943603516, 81.792724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 162.1261444091797, "std": 237.99510192871094, "min": -413.3664855957031, "p10": -123.8926155090332, "median": 150.59950256347656, "p90": 498.3869079589844, "max": 667.3492431640625, "pos_frac": 0.703125, "sample": [310.83984375, -17.227222442626953, 64.30755615234375, 590.2418823242188, -125.41828918457031, 366.3137512207031, 434.6103515625, 667.3492431640625, -14.169610977172852, -75.94076538085938, 386.1319580078125, 188.37591552734375, 353.87030029296875, -95.89987182617188, 98.80831146240234, 157.90919494628906, 88.41227722167969, 13.72551155090332, -69.23119354248047, 415.75177001953125, -413.3664855957031, -31.452007293701172, -131.26998901367188, 323.1511535644531, 473.2197265625, 548.6250610351562, -120.33271026611328, 315.869384765625, -225.56814575195312, 157.7989959716797, -38.998260498046875, 340.6167907714844, 608.6076049804688, 224.28712463378906, 424.412109375, 180.4249267578125, -167.69671630859375, 150.49542236328125, 139.50186157226562, 649.2760009765625, 501.8349304199219, 31.5018310546875, 190.54052734375, 490.3415222167969, 150.70358276367188, 192.88021850585938, 237.14808654785156, 304.40423583984375, 156.73583984375, -25.05866050720215, 74.29306030273438, -40.590980529785156, 174.31129455566406, 50.44401931762695, 75.6279296875, 559.8675537109375, -144.57025146484375, -33.29756164550781, 45.411808013916016, 46.64473342895508, 269.57916259765625, -46.2076416015625, -178.81137084960938, 145.97640991210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 183.21072387695312, "std": 161.67335510253906, "min": -272.32977294921875, "p10": -0.7184829711914046, "median": 173.32848358154297, "p90": 381.9467407226563, "max": 596.1497802734375, "pos_frac": 0.890625, "sample": [10.049026489257812, 254.58522033691406, 291.52703857421875, 96.41509246826172, -10.850128173828125, -1.3989715576171875, -24.630523681640625, 40.05278015136719, 157.87496948242188, 109.2335205078125, 309.8673095703125, 348.64752197265625, 186.576416015625, 308.52655029296875, 61.60176086425781, 264.2702941894531, 269.643798828125, 298.9659423828125, 57.95098114013672, 58.625919342041016, 161.51104736328125, 367.82745361328125, 48.47166442871094, 75.09011840820312, 271.07806396484375, 90.73947143554688, 256.5931396484375, 50.620574951171875, 364.0411376953125, 103.97772216796875, 130.7215576171875, 106.80807495117188, 596.1497802734375, 106.8720474243164, -30.81012725830078, -272.32977294921875, 248.62710571289062, 0.86932373046875, 64.79426574707031, 265.3672180175781, -43.197052001953125, 496.1134033203125, 51.99070739746094, 39.46533203125, 479.93218994140625, 158.07115173339844, 12.004989624023438, 68.367431640625, 213.25946044921875, 381.5091552734375, 252.45913696289062, -57.13655471801758, 185.1459197998047, 423.2554626464844, 382.13427734375, 101.51826477050781, 237.20233154296875, 258.4311828613281, 443.64080810546875, 413.54278564453125, 336.88568115234375, 225.24874877929688, 333.2733459472656, 237.81411743164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 165.08358764648438, "std": 242.84327697753906, "min": -362.44927978515625, "p10": -76.15766296386717, "median": 112.8983154296875, "p90": 460.92209777832034, "max": 1077.3931884765625, "pos_frac": 0.734375, "sample": [462.2345886230469, 281.5319519042969, 97.0294189453125, 212.7586669921875, 193.15982055664062, 667.3582153320312, 253.28732299804688, 40.04655456542969, 247.7881622314453, 438.50970458984375, 153.7306365966797, 82.5665283203125, 76.3769760131836, -56.897247314453125, -62.813720703125, -18.628150939941406, 335.1208801269531, 474.9068908691406, 72.9053726196289, 71.25438690185547, 1077.3931884765625, 294.8666687011719, 19.642986297607422, 304.09539794921875, 288.13543701171875, -40.71771240234375, -17.4320011138916, 239.2477569580078, 246.6371612548828, -81.87649536132812, -46.40923309326172, 301.629638671875, -87.54081726074219, 292.2646484375, 93.8939208984375, 159.1073455810547, 656.60693359375, 302.56744384765625, -23.063627243041992, 164.92672729492188, 128.7672119140625, -8.950492858886719, 84.68519592285156, 80.18163299560547, 606.1079711914062, 532.9915771484375, 274.9354248046875, -6.584381103515625, -249.08856201171875, 140.965087890625, -261.3625183105469, 457.859619140625, 449.9014892578125, 54.30046844482422, -84.0186767578125, 33.80535888671875, -131.8450927734375, 43.73595428466797, 308.6997375488281, -362.44927978515625, 52.849830627441406, 272.75140380859375, -36.55183410644531, 17.459793090820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 160.84295654296875, "std": 201.8935546875, "min": -365.26385498046875, "p10": -39.1156509399414, "median": 103.82070922851562, "p90": 415.17168273925785, "max": 887.8458251953125, "pos_frac": 0.8125, "sample": [3.430561065673828, 19.845111846923828, 89.64688110351562, 42.984649658203125, 27.93902587890625, -56.78413391113281, 326.0699768066406, 3.8646774291992188, 109.51910400390625, -35.53186798095703, 222.29054260253906, 421.3841857910156, 144.23655700683594, 433.5755615234375, 59.149200439453125, 103.14659881591797, 39.589969635009766, 477.3356628417969, 70.69246673583984, 40.50538635253906, 94.34383392333984, -84.6296615600586, -13.082305908203125, 458.6908874511719, 234.99777221679688, 356.7538757324219, -35.277835845947266, -7.517490386962891, 400.67584228515625, 76.64506530761719, 77.20819091796875, 175.28445434570312, 267.2524108886719, 371.09173583984375, 33.66998291015625, 150.06385803222656, 97.80782318115234, -111.67049407958984, 28.12990951538086, 323.12347412109375, -365.26385498046875, 332.1549072265625, 104.49481964111328, -42.96083068847656, 65.86308288574219, 268.9021911621094, -40.65155792236328, 111.3644790649414, 568.1558837890625, 508.128173828125, 166.239013671875, 294.451416015625, 192.77978515625, -81.24707794189453, 384.8037109375, -12.226673126220703, 45.2949104309082, 40.23497772216797, 265.99017333984375, 148.512451171875, 887.8458251953125, 269.9962158203125, 344.10675048828125, 400.5284729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 170.90798950195312, "std": 223.5338897705078, "min": -437.5942077636719, "p10": -102.85867462158201, "median": 133.44464111328125, "p90": 489.1997863769532, "max": 806.3878173828125, "pos_frac": 0.796875, "sample": [410.9223937988281, 136.77020263671875, 97.99410247802734, 247.39120483398438, 181.3857421875, 157.5928192138672, -134.01666259765625, 101.37451934814453, -83.27375793457031, 581.357666015625, 130.61886596679688, -9.362052917480469, 85.63630676269531, -164.08328247070312, 444.0296630859375, -437.5942077636719, 506.2505798339844, -172.8072967529297, 233.404296875, -82.07103729248047, 229.15875244140625, 311.1396484375, 121.17943572998047, 159.52891540527344, 500.8509521484375, 263.3790588378906, 36.62507247924805, 119.75506591796875, 92.56015014648438, 555.0511474609375, 101.96709442138672, 509.7856140136719, 66.05817413330078, 118.54258728027344, -109.53416442871094, 106.49087524414062, -115.59646606445312, 42.97499084472656, 462.01373291015625, 416.05242919921875, -38.428001403808594, 381.9810485839844, 134.17034912109375, 333.05938720703125, 196.24534606933594, 53.65489196777344, 392.33331298828125, 257.4876403808594, 4.802911758422852, 81.04081726074219, 233.5000457763672, 16.084842681884766, 352.422119140625, 609.5721435546875, 277.9917907714844, 806.3878173828125, 140.2332763671875, -87.28253173828125, 220.6200408935547, 132.71893310546875, -28.471725463867188, 288.9814758300781, 109.18544006347656, -149.68247985839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 195.95510864257812, "std": 201.2640838623047, "min": -209.521240234375, "p10": -46.686164283752426, "median": 169.1824722290039, "p90": 493.1917510986329, "max": 555.900634765625, "pos_frac": 0.828125, "sample": [160.52835083007812, 518.5501708984375, -206.00094604492188, 253.07839965820312, 33.1644287109375, 555.900634765625, -209.521240234375, 14.600345611572266, 39.17034912109375, 66.3060073852539, 437.21539306640625, -5.464351654052734, 231.365478515625, 97.23846435546875, 409.11572265625, 499.8930358886719, -147.8248291015625, 169.64930725097656, 166.03414916992188, 34.894996643066406, 272.57513427734375, 504.5705871582031, 261.7950439453125, 477.555419921875, -85.80325317382812, 287.28851318359375, 132.27536010742188, 103.72747802734375, 142.67955017089844, -54.01008605957031, 280.9380187988281, 122.86273956298828, 173.366943359375, 233.5226593017578, 302.5389404296875, 448.8721008300781, 505.9249267578125, 204.306640625, -138.9465789794922, 87.05326843261719, 111.31207275390625, 384.29486083984375, 373.0406188964844, 168.71563720703125, 163.14039611816406, 550.0528564453125, 247.77651977539062, 53.27162170410156, 19.635643005371094, 436.55133056640625, 541.514892578125, 295.4434509277344, 150.43551635742188, -7.400571823120117, -29.597013473510742, 67.23019409179688, 383.9275817871094, 387.10968017578125, 297.0316162109375, -0.23885345458984375, 415.007568359375, 276.8398132324219, -139.38409423828125, 14.428764343261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 190.09698486328125, "std": 235.96681213378906, "min": -643.4765014648438, "p10": -87.29917602539062, "median": 176.77841186523438, "p90": 482.05756530761727, "max": 756.4334716796875, "pos_frac": 0.796875, "sample": [181.22222900390625, 71.40531921386719, 113.4050064086914, -121.74972534179688, 294.73260498046875, 172.3345947265625, 134.41404724121094, -17.990093231201172, 537.3421020507812, 33.945709228515625, 236.5753173828125, 433.0311279296875, 115.13191223144531, 515.7642822265625, 60.48535919189453, 40.08136749267578, -81.95161437988281, 320.1969909667969, 493.2252502441406, 326.4281921386719, 259.430419921875, -643.4765014648438, 242.52676391601562, -112.47697448730469, 79.07138061523438, 158.00225830078125, 255.69650268554688, 318.12738037109375, 76.08424377441406, 98.46688842773438, -141.45654296875, 646.294189453125, 66.0792236328125, -34.60481262207031, 152.63995361328125, 452.9969482421875, 756.4334716796875, 455.9996337890625, 139.50198364257812, -17.831069946289062, -25.34719467163086, 84.69117736816406, 68.19184875488281, 450.11181640625, 327.6864929199219, 300.6369934082031, 534.9716796875, 433.8988037109375, 284.34710693359375, -28.49386978149414, 407.3526306152344, -181.4453887939453, 245.43032836914062, 306.9982604980469, -133.76376342773438, 48.38054275512695, 590.5263671875, 392.0046691894531, -89.59098815917969, 190.83154296875, 192.16787719726562, 39.112308502197266, 414.6971740722656, 247.2751007080078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 179.7801055908203, "std": 193.92762756347656, "min": -268.11767578125, "p10": -61.02833671569824, "median": 181.81310272216797, "p90": 422.83028869628913, "max": 643.3496704101562, "pos_frac": 0.796875, "sample": [-61.92043685913086, 181.95074462890625, 393.9413146972656, -58.94676971435547, 114.06974792480469, 333.1741027832031, 62.6620979309082, -3.041250228881836, 16.148271560668945, 213.56015014648438, -228.406005859375, 394.97088623046875, 440.4341125488281, -69.1192626953125, 56.708641052246094, 274.09918212890625, 214.19931030273438, 252.54246520996094, 110.2557373046875, 167.86441040039062, -23.999927520751953, 122.09403991699219, 334.30615234375, 87.80789184570312, 164.04293823242188, 570.77685546875, 353.33160400390625, 50.65378189086914, 33.089141845703125, 311.0570983886719, -268.11767578125, 643.3496704101562, -21.512989044189453, 129.488525390625, 590.2210083007812, 196.90982055664062, 272.8790283203125, -35.87147521972656, -71.28980255126953, -20.99646759033203, 85.64474487304688, 195.66757202148438, 339.67645263671875, 239.17857360839844, 100.95636749267578, 262.37359619140625, 427.91162109375, -125.20276641845703, 329.6607666015625, 262.6016540527344, 288.0276184082031, -61.93659210205078, 361.54107666015625, 188.42112731933594, 55.39945983886719, 57.447425842285156, 87.32562255859375, 516.3936767578125, 490.4254455566406, 256.91717529296875, 77.09161376953125, 410.9738464355469, 254.3886260986328, 181.6754608154297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.69406127929688, "std": 262.2710266113281, "min": -266.076416015625, "p10": -125.69466247558591, "median": 175.19094848632812, "p90": 605.0783569335938, "max": 888.67236328125, "pos_frac": 0.75, "sample": [475.8343505859375, 432.2477111816406, 422.38653564453125, -21.997215270996094, -165.25241088867188, 584.059814453125, -137.729736328125, 289.6619873046875, 163.81906127929688, 614.0863037109375, 278.69244384765625, 70.12759399414062, 372.6774597167969, 756.1048583984375, 323.9507141113281, 153.60089111328125, 497.59442138671875, 186.56283569335938, -55.761390686035156, 615.4349365234375, 205.35853576660156, 232.40069580078125, 538.833740234375, 248.3087921142578, 27.262229919433594, 162.7174530029297, -46.16429138183594, -185.7996826171875, 210.11642456054688, 125.80071258544922, -97.61282348632812, 292.8662414550781, -150.51620483398438, 133.60289001464844, 274.3978271484375, -139.32958984375, -8.123115539550781, 888.67236328125, 4.3028564453125, 246.0181884765625, 38.47400665283203, 93.65729522705078, 228.36572265625, 23.878644943237305, 649.032470703125, 393.2481994628906, 44.75151443481445, -71.12606811523438, -11.883113861083984, 102.13926696777344, -26.350414276123047, 523.8939208984375, 195.23519897460938, 1.6224594116210938, -266.076416015625, 466.1123046875, 152.13087463378906, 227.679443359375, 0.44637298583984375, 246.1337127685547, 629.9842529296875, -188.3332977294922, -50.33452606201172, 686.5238647460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 187.60142517089844, "std": 264.7841491699219, "min": -332.0384216308594, "p10": -100.69113006591792, "median": 129.0437240600586, "p90": 507.1061096191406, "max": 1052.96044921875, "pos_frac": 0.796875, "sample": [120.49435424804688, 1052.96044921875, 565.0778198242188, 199.01113891601562, 96.78733825683594, 83.04297637939453, -125.15583038330078, 95.03434753417969, 81.97931671142578, 346.1884765625, -303.6953125, 416.9819030761719, 997.2784423828125, 333.5048828125, 258.74969482421875, 289.83740234375, -194.67733764648438, 233.98046875, 413.2518310546875, 206.61651611328125, 581.2185668945312, 95.58982849121094, 121.75343322753906, 286.1812744140625, 59.857025146484375, 55.30064392089844, 58.062522888183594, 770.2840576171875, 380.9273986816406, 502.388671875, 124.07080078125, 442.1600036621094, 189.180419921875, 90.96980285644531, -118.91629028320312, 115.00941467285156, 141.3679962158203, 509.12786865234375, -22.062820434570312, 43.91462326049805, 297.1681823730469, 136.7078399658203, -47.12842559814453, 276.7081298828125, 156.2602081298828, -1.000274658203125, -38.09454345703125, 659.7862548828125, -58.16575622558594, 8.287237167358398, 245.7081298828125, 134.0166473388672, -179.93380737304688, -21.99334716796875, -123.7328109741211, 109.39646911621094, 146.40550231933594, 8.47119140625, -332.0384216308594, 388.9512939453125, 267.2835388183594, 18.85845947265625, 32.94874954223633, 327.9866943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 197.0161895751953, "std": 260.83905029296875, "min": -424.3407287597656, "p10": -104.51485137939446, "median": 177.58743286132812, "p90": 516.8339172363283, "max": 888.247802734375, "pos_frac": 0.828125, "sample": [57.536590576171875, 310.5455627441406, 115.10891723632812, 338.3460693359375, -146.69171142578125, 22.704574584960938, 480.79583740234375, 146.24923706054688, 389.4408264160156, -35.855770111083984, 37.641571044921875, -168.9987335205078, 415.98321533203125, 180.63246154785156, 22.466886520385742, 269.83154296875, 532.27880859375, 209.01344299316406, 97.7562026977539, 213.06246948242188, 68.07535552978516, 358.0602722167969, -424.3407287597656, -133.94017028808594, 217.15478515625, 649.10302734375, 291.2364501953125, 79.96833801269531, 143.33837890625, 888.247802734375, 345.07073974609375, 151.8414306640625, 10.120668411254883, 174.5424041748047, -249.28948974609375, 141.5285186767578, -28.787683486938477, -387.315185546875, 445.1535949707031, 277.40338134765625, -14.342521667480469, 280.50213623046875, 62.55830001831055, 320.88787841796875, 302.7557067871094, 101.2527847290039, 0.13352203369140625, 349.40521240234375, 270.3663330078125, -144.3111114501953, 450.48187255859375, 90.74436950683594, 205.3492431640625, 8.235774993896484, 225.1835174560547, 593.8993530273438, 633.974853515625, -11.913375854492188, 12.917221069335938, 82.83198547363281, 282.18115234375, 374.9852600097656, 816.0748291015625, 809.8624267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 179.90646362304688, "std": 279.97979736328125, "min": -363.63226318359375, "p10": -153.80068817138672, "median": 138.77600860595703, "p90": 510.5842529296876, "max": 911.3707885742188, "pos_frac": 0.75, "sample": [726.7568359375, -327.54888916015625, 128.15585327148438, 58.00981903076172, 12.33221435546875, 267.6906433105469, 50.4954833984375, 333.2577819824219, 103.00373840332031, -130.7716827392578, 377.1998596191406, 1.5144805908203125, 266.9068603515625, 129.1484832763672, 207.83935546875, -363.63226318359375, -148.23509216308594, 14.733932495117188, 702.331787109375, 792.3345336914062, 37.55195617675781, 4.456731796264648, -156.18594360351562, 31.15621566772461, 157.41876220703125, -137.570556640625, 8.614486694335938, -167.373046875, -42.983116149902344, 447.7422180175781, 466.798828125, 43.747314453125, 269.4700622558594, 494.01641845703125, 285.7225646972656, 161.73533630371094, -125.69182586669922, 148.40353393554688, 25.38751220703125, 782.3914184570312, 440.2897033691406, 362.2751159667969, 911.3707885742188, 527.4678955078125, -81.42373657226562, 355.76971435546875, -165.62057495117188, 469.0474853515625, -29.887588500976562, 261.5445251464844, 379.0032958984375, 52.46875, 311.76739501953125, 367.1958923339844, 486.1419372558594, 517.6847534179688, 76.10389709472656, 254.65380859375, 227.96263122558594, -11.871284484863281, -178.99330139160156, -29.087879180908203, 311.0395202636719, -237.22190856933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 160.89739990234375, "std": 221.77073669433594, "min": -186.3019256591797, "p10": -85.66900634765625, "median": 134.78648376464844, "p90": 510.021630859375, "max": 735.2792358398438, "pos_frac": 0.734375, "sample": [593.8519287109375, -49.91632843017578, 281.30706787109375, 15.600738525390625, -51.836910247802734, 63.83979797363281, 43.95545959472656, 146.5791778564453, 277.2034912109375, -34.9375, 282.70440673828125, 39.56451416015625, -81.10719299316406, 292.8368225097656, 104.87725067138672, 605.7091064453125, 302.12042236328125, 189.11154174804688, -160.42477416992188, 347.53302001953125, 210.2362060546875, 504.609619140625, 529.1027221679688, 42.507659912109375, -127.51300048828125, 202.06362915039062, 285.48370361328125, 172.71092224121094, 411.8323669433594, 160.7261199951172, 448.9664611816406, 43.65541076660156, 83.78091430664062, 122.99378967285156, 158.78004455566406, 323.80426025390625, -131.83901977539062, -19.179046630859375, 239.10032653808594, 10.575592041015625, 12.926288604736328, -57.3431396484375, 735.2792358398438, 40.44923400878906, 227.05621337890625, 4.740455627441406, -185.98220825195312, -15.778181076049805, -87.62406921386719, 589.0218505859375, 98.804931640625, 260.9725341796875, 614.4656372070312, 146.98062133789062, 156.7972869873047, -121.71000671386719, 280.9176330566406, -17.819881439208984, 415.5604553222656, 512.341064453125, -186.3019256591797, -61.463104248046875, 70.28602600097656, -16.113014221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 233.23556518554688, "std": 253.80020141601562, "min": -251.30178833007812, "p10": -10.400272178649894, "median": 176.96548461914062, "p90": 615.9892456054688, "max": 936.8214721679688, "pos_frac": 0.875, "sample": [324.1458740234375, 116.34941101074219, 481.6661376953125, 77.56858825683594, 38.92498016357422, 926.675537109375, 193.25048828125, 714.2824096679688, 488.52484130859375, 681.4370727539062, 350.4844665527344, 333.66754150390625, 35.809234619140625, 27.23776626586914, 22.09955596923828, 229.41310119628906, 70.38008117675781, 99.23419189453125, 23.14565658569336, 438.10150146484375, 168.78634643554688, 15.711429595947266, 644.673095703125, 436.04730224609375, 273.99395751953125, 122.54914093017578, 73.010986328125, 626.650634765625, 181.25758361816406, 534.45068359375, -2.4853687286376953, 144.1734619140625, 51.32672119140625, 172.6733856201172, 237.08840942382812, 320.87396240234375, 425.785888671875, 58.88721466064453, 329.2330322265625, 23.084327697753906, 591.1126708984375, 320.0086364746094, 39.17254638671875, 404.12310791015625, 192.53416442871094, 143.44918823242188, -233.84230041503906, 13.752696990966797, 936.8214721679688, 170.4708709716797, 745.07958984375, -251.30178833007812, 203.0619354248047, 212.297607421875, 388.95867919921875, -18.689109802246094, -47.335044860839844, -13.792373657226562, -64.02936553955078, 122.9083480834961, 239.12945556640625, -110.51763153076172, 128.08633422851562, 305.4458923339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 254.49911499023438, "std": 236.30441284179688, "min": -172.82029724121094, "p10": -43.94992294311523, "median": 242.31617736816406, "p90": 585.4386474609375, "max": 715.9205932617188, "pos_frac": 0.8125, "sample": [385.6385498046875, 80.42774200439453, 134.80416870117188, 474.6963195800781, 465.1619567871094, 60.163299560546875, 449.17144775390625, 399.93902587890625, -172.82029724121094, 588.0289306640625, 711.6004638671875, 187.6023712158203, 236.550048828125, -11.0185546875, 152.73199462890625, 435.23175048828125, 195.82322692871094, 2.7674331665039062, 103.1894302368164, 417.3834533691406, -34.94464111328125, 347.28704833984375, 332.7606506347656, 492.45220947265625, 23.784557342529297, 525.1942138671875, -10.78676986694336, 116.23177337646484, 715.9205932617188, 404.25439453125, 608.6953735351562, 452.75567626953125, 150.88133239746094, 590.4910888671875, 456.041748046875, -60.13066101074219, 294.2745666503906, 117.39256286621094, 311.732177734375, 103.00408935546875, 194.00701904296875, 533.4586791992188, -21.352880477905273, -43.58116149902344, 665.6489868164062, 248.08230590820312, 579.3946533203125, 329.2383117675781, 277.0323181152344, -56.86791229248047, 127.21894836425781, 107.3392105102539, 288.62774658203125, -44.10796356201172, -149.05081176757812, 112.72586822509766, 122.73471069335938, -114.1511459350586, 642.03759765625, 444.24664306640625, 104.48880767822266, 429.8543701171875, -134.5323944091797, 411.08612060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 192.4683380126953, "std": 229.1135711669922, "min": -331.56494140625, "p10": -48.472715759277335, "median": 151.2028045654297, "p90": 534.4242797851564, "max": 842.6651000976562, "pos_frac": 0.796875, "sample": [59.214393615722656, -40.41050720214844, 801.197998046875, 117.3123779296875, 232.06857299804688, 106.49394226074219, -17.145151138305664, 200.34730529785156, 123.94658660888672, 631.6787109375, 22.38743019104004, 107.5933837890625, 334.211669921875, 549.0872192382812, 584.5736694335938, 227.04917907714844, -19.725723266601562, 151.02374267578125, -76.29314422607422, -25.850717544555664, 569.5560913085938, 71.84337615966797, -116.62049865722656, 500.21075439453125, 842.6651000976562, 312.48681640625, -0.9004325866699219, -84.67637634277344, 227.39964294433594, 312.2149963378906, 62.76806640625, 315.8912048339844, 72.42595672607422, 403.5152893066406, 234.41563415527344, 142.8935546875, 571.2261962890625, 33.80541229248047, -51.927947998046875, 453.1547546386719, 429.14984130859375, 221.50830078125, 15.922016143798828, -38.348121643066406, 330.89923095703125, 211.47305297851562, 132.014404296875, 206.79345703125, 134.55043029785156, -69.35757446289062, -331.56494140625, 65.32059478759766, 241.75271606445312, 227.91314697265625, 364.8936462402344, 379.00994873046875, 246.7589111328125, 151.38186645507812, 59.13966369628906, 236.25161743164062, 137.55746459960938, 253.295654296875, -247.02117919921875, 17.57115936279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 222.212646484375, "std": 239.9444580078125, "min": -333.9141540527344, "p10": -115.00978240966796, "median": 223.46128845214844, "p90": 528.2407287597657, "max": 945.6641235351562, "pos_frac": 0.828125, "sample": [307.6522521972656, 164.4683074951172, 289.4620666503906, 273.0699157714844, 81.64979553222656, 245.91073608398438, 367.9021301269531, 489.1363525390625, 234.10610961914062, 102.2571029663086, -116.91807556152344, -188.204345703125, 313.2779235839844, 125.60652160644531, 302.06243896484375, 107.72511291503906, 88.33850860595703, 277.51910400390625, 282.5707092285156, 179.5333251953125, 202.71595764160156, 400.7928771972656, 212.81646728515625, 246.47378540039062, 322.91741943359375, 35.5869140625, 354.6337585449219, 786.9801025390625, 445.27325439453125, 348.30645751953125, -115.06610107421875, 172.10333251953125, 186.34999084472656, 535.4317016601562, -264.048583984375, 353.52880859375, -333.9141540527344, -84.81378173828125, -128.507568359375, 378.6587219238281, 549.933837890625, 465.0128479003906, 603.2153930664062, 19.978853225708008, 176.22251892089844, 41.81877136230469, 537.2575073242188, 561.14697265625, 268.76849365234375, 511.4617919921875, 945.6641235351562, -114.87837219238281, 147.26927185058594, 415.756103515625, -29.459583282470703, 38.37395477294922, -146.97418212890625, -29.401254653930664, 339.0714111328125, 173.4762420654297, 140.56414794921875, 211.74151611328125, 256.0247497558594, 156.2187042236328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 238.38137817382812, "std": 255.02056884765625, "min": -201.1964111328125, "p10": -72.14968643188476, "median": 223.46613311767578, "p90": 582.1647888183594, "max": 972.0992431640625, "pos_frac": 0.8125, "sample": [150.58447265625, 206.15472412109375, 890.824462890625, 133.5763702392578, 274.0635986328125, -56.398521423339844, 181.25454711914062, -201.1964111328125, 824.3449096679688, 201.4676971435547, 411.99224853515625, 330.4567565917969, -44.23051452636719, 366.1877746582031, 228.6563720703125, 570.2584228515625, 587.2675170898438, 375.5331726074219, 338.6112365722656, -152.89755249023438, 356.40203857421875, 335.76019287109375, -49.608070373535156, 241.4857177734375, 71.03540802001953, 26.79259490966797, 290.0789794921875, 303.1517028808594, -84.53724670410156, 622.526123046875, -75.75758361816406, 215.93092346191406, -43.817420959472656, 179.16506958007812, 453.6219482421875, 130.799072265625, 145.92584228515625, 201.86158752441406, 972.0992431640625, 341.6958923339844, 408.4889831542969, 57.889320373535156, 106.53807067871094, -73.4674072265625, 667.3778076171875, 63.82428741455078, 351.2049255371094, 218.27589416503906, 1.7063159942626953, 10.171798706054688, 384.2563171386719, 330.423095703125, 290.77777099609375, 416.283203125, 445.36627197265625, 613.053466796875, 500.3927001953125, 2.0140514373779297, -99.6602783203125, -69.07500457763672, 50.210968017578125, 257.5442199707031, 249.59674072265625, -177.90939331054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 205.57069396972656, "std": 267.8816833496094, "min": -373.4510498046875, "p10": -84.79826049804687, "median": 191.40318298339844, "p90": 543.0921813964844, "max": 996.6962890625, "pos_frac": 0.78125, "sample": [692.6624145507812, 123.64624786376953, 315.08837890625, 252.56329345703125, -42.783409118652344, 250.70779418945312, 325.03948974609375, 301.4053955078125, 201.62445068359375, 490.0408935546875, 481.2040710449219, 375.9202880859375, 124.48477172851562, 53.190059661865234, -250.0653533935547, 313.85809326171875, 414.64959716796875, 252.49588012695312, 174.60629272460938, 541.4921875, 794.0873413085938, 140.74203491210938, -373.4510498046875, 289.46466064453125, 206.5793914794922, 288.2261657714844, 697.690673828125, -299.5314636230469, -97.44288635253906, 161.68568420410156, 78.73561096191406, 543.7778930664062, -28.16290283203125, 340.599365234375, 162.55926513671875, 417.06744384765625, 222.50364685058594, -307.8550720214844, 996.6962890625, 101.44376373291016, -78.86654663085938, 65.96395111083984, 15.142190933227539, 97.00560760498047, -47.84357452392578, 189.94293212890625, -70.577880859375, 27.013378143310547, 192.86343383789062, 277.98095703125, 643.899658203125, 659.6617431640625, -3.6188011169433594, -279.8741149902344, 181.7301025390625, 177.2657470703125, 155.49862670898438, 153.0332489013672, -16.354093551635742, 451.59259033203125, 235.42474365234375, 265.9491271972656, 223.78485107421875, -87.34042358398438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 230.3541259765625, "std": 241.87794494628906, "min": -309.2044982910156, "p10": -51.89152374267578, "median": 252.42283630371094, "p90": 520.6351196289063, "max": 849.9012451171875, "pos_frac": 0.78125, "sample": [-141.68157958984375, 830.4906005859375, 338.5879211425781, 849.9012451171875, 94.9475326538086, -68.34658813476562, 523.1983032226562, 409.8811950683594, 318.6795959472656, -77.49496459960938, -48.47978210449219, -34.98027801513672, 380.234619140625, 270.2362060546875, 297.776123046875, 367.62408447265625, 22.258338928222656, 250.30377197265625, -24.33350372314453, -103.55691528320312, 567.076171875, 560.8375854492188, 243.41455078125, 563.4616088867188, -53.35369873046875, 265.3270263671875, 362.4136962890625, 513.95751953125, 11.513191223144531, 514.6543579101562, -29.723752975463867, 241.5179443359375, -14.682781219482422, 368.888427734375, 1.51165771484375, 17.135311126708984, 257.05401611328125, 188.1250457763672, 13.923465728759766, 309.579345703125, 459.55706787109375, 433.0970458984375, 382.92510986328125, -110.94457244873047, 514.2974243164062, 138.68943786621094, 297.7335205078125, -29.558055877685547, 254.54190063476562, -6.1681365966796875, 132.0933837890625, 673.7915649414062, 235.42990112304688, 293.1182861328125, 379.037841796875, 366.2008056640625, 93.13348388671875, 87.94181060791016, 63.684722900390625, -309.2044982910156, 11.276031494140625, 346.90167236328125, 436.1662902832031, 241.04498291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 191.61865234375, "std": 247.1456756591797, "min": -457.420654296875, "p10": -76.88332443237304, "median": 172.04446411132812, "p90": 526.411376953125, "max": 806.4495239257812, "pos_frac": 0.796875, "sample": [464.7924499511719, 245.02719116210938, 611.5843505859375, -457.420654296875, 582.050537109375, -323.8115539550781, 571.9268798828125, 806.4495239257812, 5.541156768798828, 100.99028778076172, 273.45367431640625, 38.3572998046875, 169.16790771484375, 140.88914489746094, 17.217567443847656, 307.70294189453125, 97.89466094970703, 309.1879577636719, -52.740089416503906, 629.64453125, -13.451545715332031, 132.70399475097656, 482.42919921875, -98.20055389404297, -74.37147521972656, 320.18017578125, 145.7558135986328, 136.84181213378906, 405.4581298828125, 32.691070556640625, 501.9911804199219, 280.47265625, 416.321533203125, -34.663543701171875, 290.54608154296875, 24.727874755859375, -17.034988403320312, 174.9210205078125, 314.79315185546875, 235.64385986328125, 299.5953369140625, 49.8692626953125, 214.06451416015625, 529.666259765625, -297.7855529785156, 151.03770446777344, 300.58013916015625, 593.024169921875, 409.0023498535156, -77.95983123779297, 255.4801025390625, 518.816650390625, -92.77819061279297, 294.9759826660156, -260.74713134765625, 246.1456298828125, 120.18255615234375, 258.88214111328125, 76.50594329833984, 282.81219482421875, 21.29033660888672, -7.471866607666016, 147.633544921875, 35.110015869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 192.1376953125, "std": 299.3754577636719, "min": -422.9287109375, "p10": -128.0442398071289, "median": 164.63864135742188, "p90": 527.3049407958986, "max": 1030.7677001953125, "pos_frac": 0.71875, "sample": [-236.78945922851562, 644.2335815429688, -38.828857421875, 186.3194580078125, -3.663125991821289, 224.046142578125, 242.4969482421875, 309.58721923828125, 83.63267517089844, 197.78518676757812, 907.28125, -92.58218383789062, 126.68399047851562, 98.85044860839844, -35.3929328918457, 776.693115234375, -225.59275817871094, -374.8548583984375, 331.9209899902344, 264.3655700683594, 24.402862548828125, 158.60311889648438, -137.5111083984375, 184.33993530273438, 354.3839111328125, 943.2323608398438, 466.2730712890625, -105.95487976074219, 82.37057495117188, 491.1333312988281, 506.0490417480469, 111.84748840332031, 72.48973846435547, 92.47711944580078, 384.6617126464844, 331.88134765625, 140.1931915283203, 328.7966613769531, 188.930419921875, -16.827478408813477, 504.98419189453125, -422.9287109375, -71.26905059814453, 422.70599365234375, 536.4146118164062, 378.9317932128906, 170.67416381835938, -320.0893249511719, -215.49072265625, 100.4275894165039, 256.5392761230469, 87.77950286865234, 229.4974822998047, -18.82442855834961, 86.01963806152344, 108.91357421875, 391.36541748046875, -102.35276794433594, -39.16647720336914, 1030.7677001953125, 682.6409912109375, 219.53538513183594, -34.111114501953125, 325.8827209472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 161.921630859375, "std": 229.2771453857422, "min": -354.2310791015625, "p10": -83.13988723754882, "median": 137.43584442138672, "p90": 463.4220214843752, "max": 896.0621337890625, "pos_frac": 0.796875, "sample": [-320.7704162597656, -354.2310791015625, 254.576416015625, -55.612754821777344, 336.5830993652344, 278.17913818359375, 147.73788452148438, -93.95838928222656, 170.91506958007812, 292.74847412109375, 348.29937744140625, 515.9576416015625, 320.1727294921875, 153.83592224121094, 138.66273498535156, 123.04456329345703, 41.172035217285156, 113.94075012207031, 54.45890808105469, 251.49020385742188, -190.69036865234375, 321.392333984375, 287.97674560546875, 607.6095581054688, 25.52579116821289, 74.33370208740234, 122.64942932128906, 76.32170867919922, 124.46797180175781, 12.411994934082031, 416.4289245605469, -53.32167053222656, 896.0621337890625, -45.69283676147461, 542.2012329101562, 39.78478240966797, 520.575439453125, -94.74879455566406, 235.52377319335938, 213.3480224609375, -75.21239471435547, 125.89128875732422, 397.6395568847656, -39.067901611328125, 246.97674560546875, 307.3616638183594, 127.53118133544922, 273.1136474609375, 136.20895385742188, 185.7889404296875, 323.5946350097656, -312.43536376953125, 23.43010711669922, 652.142822265625, 30.582275390625, -46.203636169433594, 483.5619201660156, 45.89441680908203, -86.53738403320312, 254.72802734375, 154.84930419921875, 44.7809944152832, 62.490257263183594, 196.5115966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 160.74095153808594, "std": 205.0279998779297, "min": -395.5402526855469, "p10": -28.59437713623046, "median": 137.82896423339844, "p90": 425.8518676757813, "max": 644.2683715820312, "pos_frac": 0.84375, "sample": [87.75198364257812, 9.402957916259766, 109.21095275878906, 411.83349609375, 152.41355895996094, -15.07574462890625, 156.9525146484375, 114.2181396484375, 360.1338806152344, -304.63433837890625, -118.36642456054688, -395.5402526855469, 547.1909790039062, 59.784812927246094, -114.75264739990234, 178.23959350585938, 192.38255310058594, 89.02470397949219, 254.82305908203125, 243.68417358398438, 31.455230712890625, 187.51431274414062, 231.58863830566406, 222.76300048828125, -32.14984130859375, 123.24436950683594, 626.432861328125, 227.93284606933594, 100.03813171386719, 89.95310974121094, 431.8597412109375, 203.60284423828125, 222.52601623535156, 122.486328125, 267.32342529296875, 18.294189453125, 37.027099609375, 324.41357421875, -61.573795318603516, 51.435272216796875, 107.33341979980469, 37.36993408203125, 55.59364700317383, 179.29833984375, 35.477691650390625, 100.22601318359375, 332.9147644042969, 379.1474914550781, 262.10205078125, 204.2752227783203, -20.298294067382812, 454.3489074707031, 644.2683715820312, -8.077186584472656, 315.7115478515625, 183.34732055664062, 548.6990966796875, 374.5821228027344, 11.803976058959961, -339.54974365234375, 456.19256591796875, 358.9556884765625, 76.40501403808594, 92.44774627685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 245.57040405273438, "std": 247.11785888671875, "min": -282.67388916015625, "p10": -83.33023262023923, "median": 254.88333129882812, "p90": 571.9814880371096, "max": 783.86474609375, "pos_frac": 0.8125, "sample": [210.81295776367188, 207.8091583251953, 481.05059814453125, 200.88851928710938, 182.91697692871094, 387.6316223144531, 81.41019439697266, 315.8119812011719, 350.86578369140625, 172.3001251220703, 393.35601806640625, 343.443359375, 128.35890197753906, 434.73748779296875, -168.60647583007812, 528.73046875, -47.55727767944336, 519.4534912109375, 418.6914367675781, 355.486083984375, -241.09072875976562, 666.8275146484375, 596.6527099609375, 396.4429931640625, 84.971923828125, 590.5176391601562, 314.103759765625, -119.44892883300781, 242.7999267578125, 332.09503173828125, 406.57177734375, -165.3209228515625, -18.839385986328125, 174.4524688720703, 305.797119140625, 157.7690887451172, 266.96673583984375, 270.8173828125, 269.3697814941406, 190.48678588867188, 41.87312316894531, 71.29263305664062, -43.0145263671875, 491.1802673339844, 241.91726684570312, -98.6614990234375, -23.021129608154297, 370.63623046875, 458.34320068359375, 123.17404174804688, 720.1071166992188, 96.73829650878906, 783.86474609375, -282.67388916015625, 494.3202209472656, -29.008913040161133, 118.42698669433594, 634.7294311523438, 58.25373840332031, 438.4495849609375, 354.2081604003906, 614.37158203125, 133.77517700195312, -272.309814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 221.25289916992188, "std": 280.9493713378906, "min": -452.6372985839844, "p10": -85.65718727111813, "median": 199.12356567382812, "p90": 552.0859558105469, "max": 998.0625, "pos_frac": 0.71875, "sample": [181.8033905029297, -125.11080932617188, 241.41366577148438, 174.35427856445312, -26.2852783203125, 423.08404541015625, 317.3519287109375, 172.3009490966797, 49.538482666015625, -452.6372985839844, 42.14435577392578, 476.4972229003906, 519.8701171875, 441.1711120605469, 347.05694580078125, 532.7531127929688, -2.3393173217773438, -140.61448669433594, -49.893707275390625, 181.64849853515625, -51.018795013427734, -34.274078369140625, 358.201904296875, 998.0625, 223.77468872070312, 410.7627258300781, 430.0072326660156, 219.29119873046875, 678.5121459960938, 381.25335693359375, -105.01409912109375, 267.2818298339844, 263.7562255859375, -100.50221252441406, 78.14485168457031, 106.79202270507812, 321.9151306152344, 430.2981872558594, 143.51654052734375, 187.55868530273438, -8.393701553344727, 227.31137084960938, -111.82344055175781, 210.68844604492188, -9.355598449707031, 494.6903076171875, 92.09866333007812, 269.2655944824219, 283.84930419921875, 216.87808227539062, 789.8424072265625, 560.3714599609375, 54.55461120605469, -18.492462158203125, -24.96627426147461, 856.2171020507812, 934.369140625, 69.48077392578125, 656.8152465820312, -28.937191009521484, -41.16564178466797, 301.6298828125, 33.404571533203125, -160.57461547851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 229.90365600585938, "std": 297.9076232910156, "min": -809.1728515625, "p10": -70.35043334960938, "median": 209.86463928222656, "p90": 618.6922912597656, "max": 957.3305053710938, "pos_frac": 0.78125, "sample": [211.64999389648438, 400.52752685546875, 629.7684326171875, 326.188232421875, 370.2697448730469, -131.2790069580078, 572.8463134765625, 58.657752990722656, 519.362548828125, 211.68753051757812, 57.94537353515625, 450.8634948730469, 80.88666534423828, -73.25270080566406, 81.67384338378906, 167.15914916992188, 244.91712951660156, 197.50100708007812, 38.58067321777344, 868.2301025390625, 193.67608642578125, 112.12147521972656, 201.78883361816406, 146.5692138671875, -809.1728515625, 382.0015563964844, -5.474891662597656, 957.3305053710938, 619.86279296875, -63.57847595214844, 275.6539611816406, -45.77811813354492, 441.1479187011719, 162.2289581298828, 272.93572998046875, 432.1222229003906, 647.0545654296875, 394.46075439453125, 749.719482421875, 214.4512176513672, -5.716793060302734, -48.87410354614258, -181.4071807861328, 606.107666015625, 486.9238586425781, -123.3251953125, 333.7283630371094, -100.51887512207031, 347.0866394042969, 150.19447326660156, 208.07928466796875, -30.87000846862793, 264.50396728515625, 264.9567565917969, 75.62979888916016, 51.269432067871094, 274.1749267578125, 463.95526123046875, 740.7946166992188, -47.63922119140625, -359.0661926269531, 155.02166748046875, 615.9611206054688, 9.559089660644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 240.72105407714844, "std": 273.0813903808594, "min": -279.54815673828125, "p10": -97.95620117187498, "median": 231.06687927246094, "p90": 525.6467468261719, "max": 975.4610595703125, "pos_frac": 0.8125, "sample": [517.812255859375, -37.38385009765625, -123.09337615966797, -107.46710205078125, 343.1560974121094, 531.39697265625, 166.7595672607422, 192.23004150390625, -46.38123321533203, 73.91020965576172, -144.1387481689453, 156.53408813476562, 104.55776977539062, 460.9915771484375, 517.8478393554688, 48.601722717285156, 279.0657653808594, 298.5487365722656, 273.8561706542969, 448.3732604980469, -44.72578430175781, 116.786376953125, 379.8603820800781, 47.77341842651367, 179.144287109375, 345.27374267578125, -258.70843505859375, 374.14166259765625, 260.4311218261719, 201.70263671875, 480.3584899902344, 23.165393829345703, -279.54815673828125, 289.2364807128906, 528.9891357421875, 180.19989013671875, 51.10630798339844, 872.9109497070312, 20.090492248535156, 187.0473175048828, -22.67481231689453, 469.711181640625, 140.38784790039062, 975.4610595703125, -222.7628631591797, 876.218505859375, 737.8754272460938, 374.11083984375, 427.8192138671875, 79.53289794921875, 366.828857421875, 415.3306884765625, 174.59353637695312, 714.6971435546875, 51.623130798339844, 335.11737060546875, 264.08746337890625, 480.9893798828125, -180.78707885742188, 416.8714599609375, 12.2159423828125, 369.99383544921875, 314.2569580078125, -75.76409912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 234.03463745117188, "std": 203.91505432128906, "min": -187.35940551757812, "p10": 19.685723114013683, "median": 212.0544662475586, "p90": 463.1373687744142, "max": 865.1395263671875, "pos_frac": 0.90625, "sample": [161.94419860839844, 752.9655151367188, 292.14483642578125, -56.92449951171875, 290.6425476074219, 354.97320556640625, 69.06625366210938, 42.41630554199219, 228.4623565673828, 126.24798583984375, 737.880859375, 380.8653869628906, 865.1395263671875, 14.167366027832031, 177.4381866455078, 353.4843444824219, 657.2174072265625, 141.40599060058594, 155.96173095703125, 171.12017822265625, 476.4553527832031, 432.06207275390625, 507.96624755859375, -65.64463806152344, 74.02845764160156, 172.9890899658203, 236.58424377441406, 272.88525390625, 316.3901672363281, -57.42047119140625, 205.81629943847656, -119.44923400878906, 335.72283935546875, 219.44082641601562, 77.65850067138672, 154.2464599609375, -187.35940551757812, 218.29263305664062, 371.2481994628906, 641.5982055664062, 292.4607238769531, 201.68443298339844, 112.74933624267578, 360.7156066894531, 99.79230499267578, 118.91485595703125, 253.54861450195312, 160.37310791015625, 302.6729736328125, 112.94645690917969, -79.57965087890625, 221.6839599609375, 90.49897766113281, 101.96235656738281, 333.4123840332031, 290.5626220703125, 316.0602722167969, 105.29813385009766, 147.64613342285156, 378.9088439941406, 181.46717834472656, 295.6987609863281, 346.04541015625, 32.5618896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 162.06460571289062, "std": 243.02284240722656, "min": -268.7896423339844, "p10": -104.13819580078123, "median": 121.29008865356445, "p90": 465.31867980957037, "max": 969.83251953125, "pos_frac": 0.734375, "sample": [782.885986328125, 187.487060546875, 120.44737243652344, 280.02294921875, 42.247314453125, -7.8236236572265625, 41.11045837402344, 447.35009765625, 426.5992431640625, 177.23094177246094, -31.824234008789062, 72.55934143066406, -80.8887939453125, 969.83251953125, 162.4634552001953, 93.39031219482422, 122.35830688476562, 127.71623229980469, 55.58454132080078, -3.058603286743164, 122.13280487060547, 67.36515808105469, 473.0195007324219, -212.31239318847656, 343.01776123046875, 53.10066604614258, -136.34255981445312, 106.5904769897461, 598.0580444335938, 224.76535034179688, 154.17965698242188, 240.27450561523438, 302.8741760253906, -66.91338348388672, -73.61206817626953, 87.82755279541016, 533.1207275390625, 518.7266845703125, -110.26904296875, 269.2037353515625, 176.70211791992188, -268.7896423339844, 312.614501953125, 68.37193298339844, 319.1024475097656, 688.3622436523438, 99.91627502441406, 436.20819091796875, -89.8328857421875, 87.86613464355469, -236.14517211914062, 219.473388671875, 356.0476379394531, -148.71380615234375, 415.26031494140625, 16.062564849853516, -136.81153869628906, -4.027381896972656, -86.01388549804688, 161.548583984375, 324.169677734375, 127.0438461303711, 82.34233093261719, -29.12054443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 218.767822265625, "std": 312.4811706542969, "min": -315.92523193359375, "p10": -203.93765869140626, "median": 204.29244232177734, "p90": 613.6195678710939, "max": 960.8076171875, "pos_frac": 0.703125, "sample": [-49.46903991699219, 302.1581726074219, 960.8076171875, -18.493911743164062, 55.139015197753906, -16.135330200195312, 307.6009521484375, 255.7563018798828, 30.57562255859375, 14.991863250732422, 843.8173217773438, -204.71197509765625, 184.11288452148438, -220.71005249023438, 593.6748657226562, -240.57492065429688, 706.3222045898438, -134.42605590820312, 687.03076171875, 312.1536865234375, -45.690799713134766, 309.7050476074219, -19.327987670898438, 371.8518371582031, 458.21923828125, 537.550048828125, -202.13092041015625, 117.37744140625, 74.69580078125, 430.03045654296875, 224.4720001220703, 5.28010368347168, -87.427001953125, 515.33642578125, 622.1672973632812, 453.1274719238281, -262.1780700683594, 10.885910034179688, 545.4207763671875, 445.1399230957031, 488.72210693359375, -263.1041259765625, -17.720687866210938, 543.9691772460938, 510.0848388671875, -62.994667053222656, 176.4167022705078, 174.3404998779297, 757.9164428710938, 312.00482177734375, 506.940673828125, 318.8082275390625, 591.322265625, 135.6201171875, 245.84609985351562, -135.27052307128906, 415.35406494140625, -25.79049301147461, 343.61614990234375, -315.92523193359375, 666.2536010742188, -300.3327941894531, 0.5006866455078125, 60.43745422363281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 203.112060546875, "std": 306.5610046386719, "min": -559.8861694335938, "p10": -120.17344284057616, "median": 167.80025482177734, "p90": 678.2039001464846, "max": 846.1075439453125, "pos_frac": 0.78125, "sample": [635.3429565429688, 42.525634765625, -199.37002563476562, 497.78460693359375, 222.45440673828125, 42.684593200683594, -102.95555877685547, 70.13739776611328, 246.96951293945312, 32.95896911621094, 207.80282592773438, -16.84076690673828, 387.1338195800781, 221.90103149414062, -127.55253601074219, 203.4407501220703, 65.53941345214844, 482.717041015625, -559.8861694335938, 696.5728759765625, 45.464481353759766, 234.85623168945312, -1.9499664306640625, -273.8548278808594, 368.72564697265625, 246.84768676757812, 461.20111083984375, -414.98175048828125, -241.92332458496094, -0.0067596435546875, 36.35107421875, -41.19259262084961, 141.4404754638672, 90.79095458984375, 779.0701293945312, 790.388427734375, 319.7962341308594, 38.152565002441406, 124.35371398925781, 525.8408203125, 59.15923309326172, 319.19488525390625, 600.607666015625, 419.36761474609375, -76.78651428222656, 75.16537475585938, 846.1075439453125, 84.38778686523438, 732.66015625, 552.331298828125, -78.06226348876953, -357.23321533203125, 46.52911376953125, 306.8207092285156, 211.95497131347656, 741.552490234375, 441.9380798339844, 111.36140441894531, 56.23014831542969, 402.75689697265625, 72.00314331054688, 750.5267333984375, 207.70655822753906, 194.1600341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 157.77536010742188, "std": 278.9571228027344, "min": -340.3330383300781, "p10": -204.45661315917968, "median": 116.54946517944336, "p90": 467.4569519042969, "max": 994.8466186523438, "pos_frac": 0.703125, "sample": [185.98309326171875, 448.6775817871094, 454.74700927734375, -1.728607177734375, 191.76412963867188, -82.57965850830078, 29.46848487854004, 463.30462646484375, 72.29786682128906, 59.17279815673828, -267.4669189453125, 34.84489440917969, 326.009033203125, 281.82977294921875, -21.097679138183594, 660.03125, 39.85106658935547, 112.02515411376953, -285.2900390625, 221.96405029296875, -122.826171875, 84.84544372558594, 224.62277221679688, 171.38888549804688, 725.7534790039062, 597.6765747070312, 408.1090393066406, 285.82794189453125, 46.18095397949219, 622.4020385742188, 994.8466186523438, -289.1625061035156, 150.06961059570312, -340.3330383300781, 461.2774963378906, -12.033517837524414, 465.44244384765625, 7.78886604309082, 76.93759155273438, 518.6509399414062, -130.72015380859375, 333.9293212890625, -94.75215911865234, 40.5535888671875, 262.33599853515625, -96.92557525634766, 245.5094757080078, -27.974288940429688, -251.54257202148438, -111.046142578125, 121.07377624511719, -180.6277618408203, 421.57940673828125, -220.383056640625, -204.63394165039062, 412.04986572265625, 284.3260498046875, 468.3203125, 10.476821899414062, -204.0428466796875, 334.1076354980469, 358.6470031738281, 229.5897216796875, 96.49871826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 248.54905700683594, "std": 333.004150390625, "min": -614.5650634765625, "p10": -78.66241302490234, "median": 230.70498657226562, "p90": 729.7027099609377, "max": 1180.617431640625, "pos_frac": 0.84375, "sample": [293.16693115234375, 61.245277404785156, 891.8026123046875, 19.78850746154785, 494.4700622558594, 236.869140625, 261.0315856933594, -71.50032806396484, -88.35983276367188, 899.7073974609375, 269.00054931640625, 320.6766357421875, 191.04537963867188, 422.3286437988281, 1180.617431640625, -30.063966751098633, 748.0274658203125, 382.6197204589844, 285.49420166015625, 398.10272216796875, -79.47859191894531, 84.23114013671875, 122.51484680175781, 288.0257873535156, 127.0257568359375, 346.5384826660156, 119.34475708007812, 90.33828735351562, 746.8612670898438, 229.14944458007812, 190.20152282714844, 59.893943786621094, 82.97358703613281, 232.26052856445312, 107.80207824707031, 81.702880859375, -321.1339111328125, 669.7484741210938, -76.75799560546875, -163.248046875, 689.6660766601562, 43.07520294189453, 841.449462890625, 67.54887390136719, 429.24322509765625, 61.32501220703125, -614.5650634765625, 83.8026351928711, 311.74957275390625, 331.63720703125, 139.46461486816406, 108.2985610961914, 219.24807739257812, 11.603189468383789, 303.5419006347656, 502.560791015625, 447.89324951171875, 1063.303955078125, -385.0799560546875, 332.90576171875, -396.5262451171875, 354.75030517578125, 314.0090637207031, 542.1694946289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 241.88287353515625, "std": 266.20941162109375, "min": -261.39617919921875, "p10": -105.042854309082, "median": 223.87167358398438, "p90": 590.0349487304688, "max": 995.340087890625, "pos_frac": 0.859375, "sample": [924.1685791015625, 278.899658203125, -261.39617919921875, 73.9796371459961, 555.6934204101562, 500.2061462402344, 2.3326034545898438, 307.59124755859375, 63.62580490112305, 467.85595703125, 7.504310607910156, 282.47052001953125, 278.01202392578125, 411.63470458984375, 457.5186767578125, -133.335205078125, 221.73143005371094, 192.60032653808594, -39.91569519042969, 511.18817138671875, 226.0119171142578, 242.6927490234375, 38.02411651611328, 612.3208618164062, 159.52134704589844, -119.97576904296875, 995.340087890625, 83.710205078125, -177.56729125976562, -178.06564331054688, 205.61337280273438, 186.07029724121094, 270.48834228515625, -186.89990234375, 189.2422637939453, 161.3356170654297, 480.5914611816406, 14.604568481445312, 287.0855712890625, 373.1241760253906, 54.9558219909668, 676.8987426757812, 80.69140625, 191.99261474609375, -255.30686950683594, 147.57681274414062, 775.334716796875, 532.90869140625, 374.58673095703125, 85.00409698486328, 189.71798706054688, 70.9271469116211, 604.7527465820312, 379.18115234375, 102.12666320800781, 615.40869140625, 376.8258056640625, 231.76797485351562, 29.455284118652344, 344.4754943847656, 341.8649597167969, 261.2040100097656, -70.19938659667969, 372.71881103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 232.34637451171875, "std": 231.36647033691406, "min": -214.32733154296875, "p10": 3.0608558654785156, "median": 192.2887725830078, "p90": 531.9264953613281, "max": 832.986328125, "pos_frac": 0.90625, "sample": [46.17247009277344, 551.427734375, 77.12275695800781, -214.32733154296875, 200.83419799804688, 153.90110778808594, -140.909912109375, 256.1297607421875, 631.4933471679688, 272.8701477050781, 94.94139099121094, 75.94215393066406, -126.96182250976562, 526.0069580078125, 109.50469970703125, 355.37530517578125, 60.878631591796875, 532.213623046875, 406.3078308105469, 490.5499267578125, 705.7684936523438, 5.064689636230469, 240.51629638671875, 234.5470733642578, 268.0252685546875, 18.247329711914062, 429.00140380859375, -21.67894744873047, -33.21784210205078, 2.9809188842773438, 221.01797485351562, 486.8855895996094, 500.49835205078125, 71.93482971191406, 251.4613037109375, 364.3787841796875, 153.16421508789062, 60.6174201965332, 399.5300598144531, 154.84056091308594, 415.74041748046875, 46.25688171386719, 61.363616943359375, 57.411834716796875, 528.3585205078125, 48.66864013671875, 210.21279907226562, 127.78365325927734, 388.38427734375, 156.33363342285156, 3.24737548828125, 30.444263458251953, 531.2565307617188, 832.986328125, 7.385566711425781, 305.6121520996094, 510.47430419921875, 183.74334716796875, 344.240478515625, 157.92355346679688, 628.8837280273438, -198.82156372070312, 74.9623031616211, 544.2579956054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 229.5679168701172, "std": 243.6851348876953, "min": -257.531982421875, "p10": -31.59123420715332, "median": 208.43653106689453, "p90": 622.467822265625, "max": 826.3442993164062, "pos_frac": 0.828125, "sample": [76.05511474609375, -119.16373443603516, -30.754032135009766, -49.42388916015625, 625.35107421875, 54.37098693847656, 83.80168151855469, 191.18296813964844, 368.0511474609375, 40.40277862548828, 826.3442993164062, 83.97828674316406, 85.9091796875, 134.86798095703125, 274.6712646484375, 388.18267822265625, 27.154375076293945, 4.569511413574219, 80.53307342529297, 615.740234375, 273.28057861328125, -0.32613372802734375, 238.20962524414062, -10.941749572753906, 230.40060424804688, 160.86004638671875, 655.3580322265625, 28.878204345703125, 380.6812744140625, 684.7089233398438, -179.5816650390625, 271.5118713378906, -257.531982421875, 157.26882934570312, 521.1243896484375, 355.9228515625, 176.56610107421875, -11.803308486938477, 228.82803344726562, 463.8802795410156, 161.18028259277344, 294.56390380859375, 439.85076904296875, 525.385009765625, 117.55469512939453, 663.6887817382812, -184.64572143554688, 642.6181640625, 218.28628540039062, -173.1431427001953, 230.272705078125, 186.19866943359375, 56.45323181152344, 238.52798461914062, 55.70261764526367, 198.58677673339844, 246.9219512939453, -31.950035095214844, 505.4034423828125, 317.6770324707031, 543.912109375, 628.6439208984375, 392.9512939453125, 288.585693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 201.13601684570312, "std": 269.2003479003906, "min": -453.57550048828125, "p10": -98.59641265869136, "median": 179.64993286132812, "p90": 542.3779907226562, "max": 956.81640625, "pos_frac": 0.796875, "sample": [188.47906494140625, 287.1086730957031, 87.15702056884766, 185.97296142578125, 178.45591735839844, 538.7438354492188, 642.3829345703125, -453.57550048828125, 254.74981689453125, -44.72270965576172, 367.104736328125, 956.81640625, 109.76651000976562, -243.66806030273438, 581.4268798828125, -53.910606384277344, 292.2358093261719, 122.98798370361328, -28.63532257080078, 82.81588745117188, 180.8439483642578, -186.30718994140625, -342.4540710449219, 502.65045166015625, -117.39601135253906, 147.78854370117188, 10.348104476928711, 440.0670471191406, -6.554290771484375, 320.9786071777344, 532.9180297851562, 69.41529846191406, 843.55517578125, 668.6928100585938, 48.55638122558594, 143.1033935546875, 106.80731201171875, 269.9656982421875, 172.8970489501953, 215.1829833984375, 235.7723388671875, 127.80160522460938, 222.25869750976562, 8.06106185913086, -134.34352111816406, 291.5616760253906, 203.3433837890625, -43.83586883544922, 543.9354858398438, 45.06455993652344, 149.39907836914062, 444.3248291015625, 518.8662109375, 612.1492919921875, 373.895751953125, 250.17239379882812, 117.85760498046875, 46.795230865478516, 374.89056396484375, 186.87289428710938, -256.48504638671875, 429.842041015625, -54.730682373046875, 106.48355865478516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 233.8590850830078, "std": 262.9945068359375, "min": -291.9263916015625, "p10": -72.4373565673828, "median": 225.91905975341797, "p90": 542.5728454589845, "max": 1153.7083740234375, "pos_frac": 0.78125, "sample": [1153.7083740234375, -75.57318115234375, -72.70381164550781, -179.10037231445312, 443.6187744140625, 549.9546508789062, 691.81005859375, 525.3486328125, 249.9429473876953, 248.52908325195312, 11.25074577331543, 213.8924560546875, 130.1278076171875, 266.6142272949219, -3.0493335723876953, -54.08387756347656, -71.81562805175781, 356.58319091796875, 101.6875228881836, 288.2068176269531, -153.30001831054688, 376.25946044921875, 451.60467529296875, 249.62860107421875, 78.07836151123047, 580.381591796875, 179.222900390625, 190.70205688476562, -40.24205017089844, -69.52641296386719, 129.48097229003906, 413.29150390625, 648.630126953125, 288.2032775878906, 291.2864990234375, 449.3022766113281, 114.88438415527344, 13.293832778930664, -63.99311065673828, 449.13153076171875, 489.9569091796875, 303.25347900390625, 95.79731750488281, 94.2714614868164, -291.9263916015625, 514.2598266601562, 64.02815246582031, 237.94566345214844, -244.16751098632812, 344.4348449707031, 212.51235961914062, 281.8536071777344, 441.75152587890625, -73.7132339477539, 713.7001953125, -17.274368286132812, 390.74591064453125, 485.509765625, 441.53961181640625, 84.15673828125, 207.84164428710938, 128.47732543945312, 566.6343383789062, 144.12271118164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 203.56809997558594, "std": 280.9884338378906, "min": -668.1551513671875, "p10": -95.76732482910155, "median": 171.6006317138672, "p90": 473.82757263183595, "max": 1065.0675048828125, "pos_frac": 0.765625, "sample": [708.363525390625, -668.1551513671875, -214.466552734375, 538.0494384765625, 100.46026611328125, 54.439422607421875, 174.77566528320312, 382.5181884765625, 354.38055419921875, 309.8771667480469, 144.9195098876953, 96.88658142089844, 208.78335571289062, 192.7527313232422, 464.4383239746094, 286.74456787109375, 92.63917541503906, 476.8086853027344, -199.5272216796875, -36.853729248046875, 427.7498779296875, -69.3617172241211, 87.84329223632812, -99.09404754638672, -97.92301177978516, 318.4436950683594, 1000.6763916015625, 394.0743713378906, 141.28126525878906, -191.76475524902344, -65.13044738769531, 1065.0675048828125, 176.81460571289062, 167.96932983398438, 5.0176544189453125, -90.73738861083984, 548.04296875, 466.87164306640625, 556.652099609375, 182.33192443847656, -26.71846580505371, 182.74969482421875, 384.1378479003906, -39.829925537109375, 453.0435485839844, 402.0649108886719, 172.5670166015625, 393.7374572753906, 126.5118408203125, 444.455322265625, 456.0993347167969, 99.80585479736328, -150.53070068359375, 136.68397521972656, 399.15814208984375, -85.35686492919922, 168.54537963867188, -71.0883560180664, 170.63424682617188, 80.56207275390625, 246.50790405273438, 431.8770446777344, 159.64859008789062, 100.43306732177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 244.30438232421875, "std": 217.9193572998047, "min": -352.58624267578125, "p10": 7.132903671264652, "median": 223.41146850585938, "p90": 503.37319335937525, "max": 878.4220581054688, "pos_frac": 0.921875, "sample": [339.7925720214844, 188.37753295898438, 157.73484802246094, 159.09796142578125, 263.33563232421875, 10.321395874023438, 308.9543762207031, 341.65472412109375, 229.72027587890625, 658.3193969726562, 77.83497619628906, -121.3175048828125, 245.90264892578125, 43.759918212890625, 199.39328002929688, 110.87860107421875, 96.18071746826172, 242.23130798339844, 222.84249877929688, 716.6940307617188, 200.3710174560547, 273.521240234375, 104.69477844238281, 332.59503173828125, 440.45758056640625, 363.49456787109375, 67.42451477050781, 109.41012573242188, 878.4220581054688, 176.73643493652344, 5.203437805175781, 93.1285400390625, 338.5796203613281, 120.8253173828125, 247.02999877929688, 343.0290222167969, 255.19046020507812, 129.1797332763672, 5.766407012939453, 435.0855407714844, -352.58624267578125, 345.2236633300781, 183.45257568359375, 410.0171813964844, 280.0993957519531, 736.2759399414062, 397.63531494140625, 346.2431640625, 218.95303344726562, 346.0489807128906, 185.76089477539062, 404.6831970214844, 68.85330200195312, -102.01434326171875, 448.56640625, 526.86181640625, 188.2768096923828, 172.44578552246094, 168.0447998046875, 223.98043823242188, -39.41681671142578, 606.109130859375, 637.3713989257812, -177.26007080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 224.7209014892578, "std": 302.7225036621094, "min": -263.05633544921875, "p10": -59.81120147705075, "median": 173.9539566040039, "p90": 569.3598510742188, "max": 1604.7716064453125, "pos_frac": 0.8125, "sample": [47.68901824951172, -71.29462432861328, -27.743789672851562, -12.503036499023438, 76.94874572753906, 55.464141845703125, 197.8525390625, 127.33586883544922, 85.09086608886719, 247.9268035888672, 5.062898635864258, 904.2203369140625, 142.09152221679688, 259.82379150390625, 117.08744812011719, 221.54629516601562, 192.24427795410156, -263.05633544921875, -246.95809936523438, 314.32684326171875, 224.0281524658203, 64.74632263183594, 180.46112060546875, 3.496936798095703, 229.6478271484375, 905.1346435546875, 595.7521362304688, 446.7032775878906, 103.1418228149414, -26.556976318359375, 443.5158386230469, 276.26513671875, 327.2982482910156, 65.2275390625, 257.1333312988281, -15.153961181640625, 371.00958251953125, -71.49557495117188, 286.09710693359375, 46.38847351074219, 577.3543701171875, 297.42706298828125, 154.13877868652344, 59.45146942138672, 87.52506256103516, 1604.7716064453125, 500.54632568359375, 292.4067687988281, 9.007875442504883, 555.6660766601562, -33.01654815673828, 515.5062255859375, 167.44679260253906, 573.035400390625, 103.55970764160156, -152.26319885253906, -180.85458374023438, 463.40234375, 560.7835693359375, -204.22006225585938, 281.766845703125, 379.74053955078125, 91.75205993652344, 591.20654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 239.21319580078125, "std": 274.2142333984375, "min": -432.2490539550781, "p10": -45.40426731109617, "median": 198.6644058227539, "p90": 611.1788024902346, "max": 933.2581787109375, "pos_frac": 0.828125, "sample": [23.87187957763672, 665.582275390625, 113.98947143554688, 400.2601623535156, 139.0122833251953, -219.26239013671875, 708.741943359375, -175.0713348388672, 122.54342651367188, 469.2479248046875, -26.64284324645996, 304.5220947265625, 182.875, 403.0232849121094, 552.581787109375, 34.499176025390625, -338.4014587402344, 329.2945861816406, 65.51197814941406, 114.28218078613281, 433.6622009277344, 419.51812744140625, 95.62043762207031, 403.8163146972656, 259.3572998046875, 537.361083984375, 113.1943588256836, 319.59710693359375, 107.02175903320312, 198.38499450683594, 31.84564208984375, 875.3828125, 423.47308349609375, 69.97764587402344, 110.65664672851562, 415.2848205566406, 433.05877685546875, 507.1064453125, 374.6283264160156, 247.23374938964844, 422.7735595703125, 933.2581787109375, 88.84915161132812, 165.17727661132812, 447.302001953125, 268.3750305175781, 2.090747833251953, 709.2655639648438, 221.41006469726562, 440.37982177734375, -53.44487762451172, -127.5993423461914, 198.94381713867188, 636.2918090820312, -10.571645736694336, -155.64035034179688, -25.38346290588379, 119.15286254882812, 668.3157958984375, 234.13177490234375, 177.628662109375, 141.44754028320312, -432.2490539550781, -6.9035186767578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 225.88394165039062, "std": 270.5721435546875, "min": -606.3065795898438, "p10": -85.39322776794431, "median": 225.67848205566406, "p90": 610.8619689941407, "max": 794.021728515625, "pos_frac": 0.796875, "sample": [296.4570617675781, 220.55848693847656, 31.12389373779297, 369.5992431640625, 713.1484375, 557.9545288085938, 710.841796875, 230.75462341308594, 168.87461853027344, 734.3892822265625, -606.3065795898438, 103.33012390136719, 416.3541564941406, 22.17264175415039, 357.5002746582031, 637.5965576171875, -12.328460693359375, 348.7186279296875, 794.021728515625, 106.02481079101562, 349.96112060546875, 196.21771240234375, -157.74179077148438, 430.8170166015625, 276.2726135253906, 350.23004150390625, 340.29876708984375, 217.0120849609375, 398.5209045410156, 71.36959838867188, 23.799894332885742, -95.73442840576172, 206.36753845214844, 68.47560119628906, 627.539306640625, 220.6023406982422, 242.8893280029297, 123.94100952148438, 291.2474670410156, -187.92247009277344, -25.649147033691406, 649.808349609375, 298.3440246582031, 556.8231201171875, 147.6925506591797, 375.0897216796875, 416.9764404296875, 357.79644775390625, -0.7139472961425781, -61.26375961303711, 257.1513366699219, -44.8331184387207, 30.689544677734375, -193.78981018066406, 109.25541687011719, 571.9481811523438, 129.65951538085938, 140.90866088867188, 478.9632263183594, -295.3650817871094, -37.75439453125, -176.43661499023438, 259.9228210449219, 316.39892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 227.87588500976562, "std": 299.1210021972656, "min": -333.5632629394531, "p10": -143.12954940795896, "median": 187.78137969970703, "p90": 643.7849914550784, "max": 921.137451171875, "pos_frac": 0.78125, "sample": [-333.5632629394531, 245.082275390625, 485.7725830078125, 343.0133056640625, 128.7335205078125, -27.28028106689453, 213.46432495117188, 298.52813720703125, 731.8497314453125, 30.44048309326172, 399.962890625, 33.23004150390625, 160.6432647705078, -158.947021484375, 431.5880126953125, -307.71832275390625, -269.51165771484375, -77.27108764648438, 802.080078125, 119.5984115600586, 522.1026611328125, 40.93533706665039, 263.0377197265625, 310.7583923339844, -226.759521484375, 542.710693359375, 580.52099609375, 345.92291259765625, 9.537544250488281, 761.7218627929688, 361.9449462890625, 705.423583984375, 342.26043701171875, -85.2941665649414, 227.36703491210938, -87.43822479248047, 519.00341796875, 157.7142333984375, 126.12115478515625, -93.42982482910156, 206.40737915039062, 310.6585998535156, 111.77306365966797, 350.10748291015625, 160.60977172851562, 91.30203247070312, 127.552978515625, -17.23682403564453, 921.137451171875, 88.88772583007812, 388.4945068359375, 670.8981323242188, 162.75025939941406, 875.104736328125, -106.22211456298828, 92.80023956298828, 357.8811950683594, -266.7559509277344, 428.5832214355469, 9.1092529296875, 514.1543579101562, -216.10423278808594, 169.15538024902344, 579.1513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 192.81918334960938, "std": 271.5542907714844, "min": -343.6005859375, "p10": -106.91587982177734, "median": 198.87191009521484, "p90": 438.40244750976564, "max": 1163.3363037109375, "pos_frac": 0.765625, "sample": [327.684326171875, 82.6383285522461, 183.07553100585938, 433.4937744140625, 392.6402282714844, -93.54483795166016, 64.70951080322266, 334.6592102050781, 380.970703125, -343.6005859375, 130.63160705566406, 77.6568603515625, 681.80908203125, 314.3605041503906, 150.19554138183594, 330.7320556640625, 14.992317199707031, 339.2146301269531, 132.59442138671875, 46.41127395629883, 321.8514404296875, 314.9859619140625, -98.70049285888672, 1163.3363037109375, 428.1044921875, 228.93887329101562, -74.6727523803711, 250.98355102539062, -11.863283157348633, 40.97472381591797, -88.14521789550781, -85.99809265136719, -220.87173461914062, 911.8019409179688, 584.5479736328125, 342.33905029296875, 314.36431884765625, 279.109130859375, 247.55210876464844, 44.75929260253906, -257.18206787109375, 431.3072509765625, -36.69403839111328, 326.6773376464844, 270.0251159667969, 238.91229248046875, -179.277587890625, 22.8019962310791, 551.6683349609375, 288.10546875, 174.17889404296875, -108.53642272949219, 440.50616455078125, -251.47186279296875, 340.1978759765625, 214.6682891845703, 102.03772735595703, 38.54281997680664, 34.622802734375, 558.5791015625, 128.20028686523438, 392.86505126953125, -151.89427185058594, -103.13461303710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 218.7605438232422, "std": 316.20001220703125, "min": -495.8793029785156, "p10": -106.73574447631835, "median": 152.08968353271484, "p90": 580.948095703125, "max": 1255.34912109375, "pos_frac": 0.796875, "sample": [165.91909790039062, -414.62310791015625, 455.2026062011719, 155.1892547607422, 77.44120788574219, 136.08248901367188, 516.56201171875, -111.78642272949219, 282.6723327636719, 449.5760192871094, 148.9901123046875, 518.7847900390625, 141.36883544921875, -34.080711364746094, 38.57646942138672, 76.21634674072266, -11.214117050170898, 21.226341247558594, 217.31423950195312, -229.18685913085938, 558.5905151367188, 366.3647766113281, 418.7178649902344, 28.51365089416504, 366.8927917480469, -55.0950813293457, 425.89202880859375, -94.9508285522461, 95.83956146240234, 134.8525390625, 812.6370239257812, 7.793067932128906, -35.0880241394043, 471.3360595703125, 179.942626953125, -495.8793029785156, -170.4207763671875, 123.54058074951172, 572.7433471679688, 50.484867095947266, 764.686767578125, 0.787322998046875, -276.86529541015625, -79.46449279785156, 637.333984375, 645.8305053710938, 888.693603515625, 180.24185180664062, 584.4644165039062, 179.51412963867188, 441.6927795410156, 529.0133056640625, 0.13799285888671875, 308.38885498046875, 294.8056640625, 303.0386047363281, -160.14505004882812, 29.138626098632812, 12.382884979248047, 495.3271179199219, 465.39892578125, 57.5576171875, 80.427734375, 1255.34912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 213.15390014648438, "std": 303.5501403808594, "min": -602.7247314453125, "p10": -174.58988494873046, "median": 221.34002685546875, "p90": 546.1952636718752, "max": 987.1338500976562, "pos_frac": 0.8125, "sample": [839.8751220703125, -437.3167724609375, 138.52862548828125, 417.5102844238281, 326.8404235839844, 215.58932495117188, 442.7919921875, 102.15335083007812, -138.4835968017578, 113.27479553222656, 110.42330932617188, 451.5971374511719, -501.5849609375, 159.1471405029297, 211.60183715820312, 8.455963134765625, 739.3158569335938, 483.46112060546875, 987.1338500976562, 297.0801086425781, 151.40780639648438, 313.2479248046875, -35.76966094970703, 497.5780334472656, 317.8682861328125, 727.4951171875, 158.34820556640625, -225.74481201171875, 125.1668472290039, 80.6189956665039, 566.4981079101562, 141.76231384277344, 171.316650390625, 202.38174438476562, 363.2583923339844, 306.73394775390625, 124.1251449584961, 275.341552734375, -256.1546630859375, 692.4994506835938, 196.0354461669922, 498.82196044921875, 339.32733154296875, -165.93557739257812, 136.76004028320312, 249.71499633789062, 602.7461547851562, 448.9864196777344, 227.09072875976562, 237.3126220703125, -602.7247314453125, 434.09906005859375, 68.18638610839844, 261.6070251464844, 36.21973419189453, 230.158203125, -343.688232421875, -101.95101928710938, -12.659217834472656, 236.80319213867188, 414.44390869140625, 264.7694396972656, -178.2988739013672, 498.65045166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 261.2742004394531, "std": 321.85040283203125, "min": -397.9166259765625, "p10": -101.68721923828122, "median": 219.9789810180664, "p90": 719.2077575683594, "max": 1160.498291015625, "pos_frac": 0.765625, "sample": [80.4036865234375, 311.0939636230469, 579.5253295898438, 1101.0953369140625, 1160.498291015625, 148.18125915527344, 404.4502258300781, 113.10708618164062, 284.01409912109375, 296.83795166015625, 81.4906005859375, 240.0418243408203, 717.697998046875, 857.0736083984375, 364.44085693359375, 360.4585876464844, 524.3688354492188, 611.5016479492188, 720.2056884765625, -10.691564559936523, -119.49813079833984, 380.169921875, 109.03213500976562, 244.91506958007812, 186.95278930664062, 302.3406066894531, 521.2274169921875, 217.4725341796875, -397.9166259765625, -53.28483581542969, -340.7544250488281, 178.52850341796875, -124.65095520019531, 118.16695404052734, -4.48919677734375, 911.7794189453125, 28.621376037597656, 144.40280151367188, 240.9486083984375, 45.665916442871094, -156.17660522460938, 373.6198425292969, 12.136798858642578, 738.4937744140625, 136.51443481445312, -8.483339309692383, 200.6446075439453, -21.319732666015625, -33.1248779296875, -183.0448455810547, -60.12842559814453, 695.2996215820312, 183.6431427001953, 397.05670166015625, 719.8547973632812, -128.82301330566406, 320.40057373046875, 682.31982421875, 324.8439636230469, 276.3469543457031, 150.07139587402344, 349.2930908203125, 222.4854278564453, -5.800254821777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 157.98960876464844, "std": 278.6183166503906, "min": -547.1092529296875, "p10": -141.8188934326172, "median": 125.08262634277344, "p90": 491.79888916015625, "max": 991.6100463867188, "pos_frac": 0.6875, "sample": [408.9987487792969, 382.12646484375, 146.12088012695312, 480.0289001464844, -379.6293029785156, 50.540245056152344, 104.01493072509766, 408.30712890625, 57.33451843261719, 248.89846801757812, 652.15283203125, 467.91558837890625, -57.88007736206055, 160.326171875, 260.8703918457031, -93.53285217285156, -39.478057861328125, 991.6100463867188, 252.46922302246094, -153.04025268554688, 188.00839233398438, 744.673828125, 90.12309265136719, 430.72674560546875, 33.30360412597656, -114.40473175048828, -55.542415618896484, -266.5264892578125, -18.69835662841797, 99.45501708984375, 49.647796630859375, 491.001220703125, 16.866615295410156, 5.217430114746094, 556.3175659179688, 136.01773071289062, -547.1092529296875, 62.803619384765625, 178.97433471679688, 366.0284729003906, 293.6861572265625, 126.87762451171875, 182.6542510986328, -200.55947875976562, -81.85107421875, -25.895111083984375, -216.72891235351562, 584.1189575195312, 492.1407470703125, -144.34226989746094, 274.37890625, -90.9547348022461, -135.93101501464844, 432.023681640625, 251.32754516601562, 222.262939453125, 411.3658142089844, -53.72191619873047, 607.4483032226562, 118.18517303466797, -28.676483154296875, -60.10215759277344, 235.30264282226562, 123.28762817382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 204.60816955566406, "std": 279.2625732421875, "min": -383.1664733886719, "p10": -97.14072418212888, "median": 147.42830657958984, "p90": 604.739178466797, "max": 917.505859375, "pos_frac": 0.71875, "sample": [379.18719482421875, 454.1217956542969, 58.868431091308594, 465.2523193359375, 193.20278930664062, 433.0051574707031, 13.679803848266602, -55.65888214111328, 581.2891845703125, 148.6907958984375, 323.75543212890625, -75.97132110595703, 261.58917236328125, 545.2572021484375, 21.26519775390625, 381.9801330566406, -132.9915313720703, -4.6168212890625, 598.3671875, 671.7266845703125, 142.02183532714844, 296.8470764160156, -33.27681350708008, 110.6640625, -383.1664733886719, 374.4869384765625, -217.00808715820312, -71.4877700805664, -106.21332550048828, 308.4668884277344, -27.080488204956055, 146.1658172607422, 331.044677734375, 361.52197265625, -4.556640625, 31.722049713134766, 2.684356689453125, -160.73678588867188, 783.57275390625, 1.4520397186279297, -21.653472900390625, 607.4700317382812, -65.2985610961914, 38.87451934814453, 102.9690170288086, 271.96917724609375, 344.97125244140625, 171.18382263183594, 7.111907958984375, 627.40576171875, 285.6903991699219, -74.19993591308594, 609.9620361328125, 376.4832458496094, 917.505859375, 756.3709106445312, -114.21830749511719, 381.8544921875, 568.4351806640625, -106.84658813476562, 34.47136688232422, 8.230037689208984, -60.85760498046875, 277.91461181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 226.20755004882812, "std": 231.23719787597656, "min": -192.515869140625, "p10": -41.27126922607421, "median": 216.7962646484375, "p90": 496.1581146240234, "max": 908.4703369140625, "pos_frac": 0.796875, "sample": [332.55120849609375, 497.50213623046875, 219.31773376464844, 48.5166015625, 459.5279541015625, 211.75335693359375, 665.191650390625, -29.738544464111328, 260.61785888671875, -133.7545623779297, -64.8543930053711, -172.33807373046875, 248.6499481201172, 908.4703369140625, 424.12188720703125, 218.32864379882812, -45.406761169433594, 368.6614685058594, 466.2337951660156, 247.35791015625, 16.410621643066406, 353.4320373535156, 150.15426635742188, 98.13198852539062, -192.515869140625, 129.8241729736328, 386.3275451660156, 205.2109375, 102.62252044677734, 348.05633544921875, 513.2935791015625, -96.98497009277344, 312.2606506347656, 260.4341125488281, 222.29132080078125, 147.23695373535156, -113.27115631103516, 493.0220642089844, 197.2481689453125, 58.26179504394531, 413.7253723144531, -31.621788024902344, 420.34991455078125, 215.26388549804688, 19.05792999267578, 296.38250732421875, 109.31127166748047, 669.3831787109375, 409.30224609375, -19.1307373046875, -22.32877540588379, 394.79266357421875, 349.90032958984375, 180.4123077392578, -18.788917541503906, 707.7696533203125, 51.166465759277344, 355.8160400390625, 425.8215637207031, -6.280364990234375, 576.6381225585938, 105.53646850585938, 133.20571899414062, 19.44149398803711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 219.6388702392578, "std": 224.483642578125, "min": -340.27740478515625, "p10": -41.37560577392577, "median": 201.57805633544922, "p90": 512.0375610351564, "max": 934.8204345703125, "pos_frac": 0.828125, "sample": [133.57545471191406, 532.1546020507812, 417.37579345703125, 527.6783447265625, 27.316307067871094, 244.90037536621094, 194.2261962890625, 934.8204345703125, -232.8826904296875, 249.47486877441406, 248.18411254882812, 545.3427734375, 24.807518005371094, -18.91065216064453, 441.5242004394531, 487.59942626953125, -80.23394775390625, 559.6056518554688, 177.3849639892578, 87.59741973876953, 402.41241455078125, 256.44110107421875, 18.179733276367188, -47.04678726196289, 284.30999755859375, 438.4635009765625, 474.16461181640625, 319.7992248535156, 169.10015869140625, -3.129638671875, -28.14284896850586, 249.72018432617188, 90.06510925292969, -340.27740478515625, 208.92991638183594, -163.17001342773438, 296.041259765625, 522.5110473632812, 374.8232727050781, 616.7147216796875, 44.42554473876953, 192.47308349609375, 130.6771240234375, 176.0150146484375, 165.50924682617188, 236.09664916992188, 57.313560485839844, 380.7024230957031, 93.69036865234375, 188.60781860351562, 465.7315673828125, 449.971923828125, 300.38330078125, 144.09408569335938, 134.01756286621094, -8.043182373046875, 116.90571594238281, -106.60832214355469, 287.15045166015625, -50.01150894165039, 85.71453857421875, 349.03179931640625, 317.1455993652344, 264.4425048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 124.59414672851562, "std": 298.2035217285156, "min": -622.5684814453125, "p10": -261.09606323242184, "median": 96.53136825561523, "p90": 513.646160888672, "max": 908.397216796875, "pos_frac": 0.671875, "sample": [192.00265502929688, 51.239097595214844, 384.1583251953125, 99.79802703857422, -6.260700225830078, 1.0838584899902344, -80.4493408203125, 201.98983764648438, -156.624267578125, 231.45855712890625, 321.42999267578125, -0.5369453430175781, 756.8926391601562, 520.2852172851562, -121.05941772460938, 547.0272216796875, -63.72048568725586, -217.6848602294922, -622.5684814453125, -272.9519958496094, 63.225257873535156, 759.4307250976562, -129.49610900878906, 122.12777709960938, 347.8745422363281, 498.155029296875, -32.890525817871094, 451.3684997558594, -283.0964660644531, 79.99300384521484, -288.55889892578125, 665.9554443359375, 97.91926574707031, -233.43222045898438, 421.098388671875, 73.24871826171875, 305.6270446777344, 365.243408203125, -280.3448486328125, 70.23158264160156, -54.65156173706055, -95.76934051513672, 120.75662231445312, 228.4080810546875, 8.959714889526367, 211.92108154296875, 277.95013427734375, -201.35162353515625, 695.6531982421875, 249.91897583007812, 184.97418212890625, 12.708953857421875, -310.1612548828125, -23.97949981689453, 152.05648803710938, 86.82779693603516, 123.21360778808594, 229.78704833984375, -413.0367736816406, 908.397216796875, 313.78045654296875, 331.5921630859375, 1.7376937866210938, 95.14347076416016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 209.94137573242188, "std": 248.00267028808594, "min": -550.6087646484375, "p10": -48.39230804443358, "median": 187.96109771728516, "p90": 591.0841857910157, "max": 791.883056640625, "pos_frac": 0.828125, "sample": [398.9571228027344, 586.4456176757812, 90.26127624511719, 655.8161010742188, 509.3966369628906, 7.2985076904296875, 68.28968811035156, 424.948486328125, 125.04241943359375, 578.2012939453125, 247.3284149169922, 135.14443969726562, -128.2406005859375, 634.4521484375, -55.36907958984375, 180.16024780273438, 211.50375366210938, 194.17559814453125, 308.38916015625, -13.878433227539062, 656.0438232421875, 101.3675765991211, 365.8683166503906, -32.11317443847656, -550.6087646484375, 34.396480560302734, 253.43016052246094, 119.69034576416016, -85.49176788330078, 596.1444702148438, 316.0862121582031, -147.60440063476562, 508.9054870605469, -184.42510986328125, 372.7615966796875, 9.805667877197266, 373.557861328125, 791.883056640625, 267.5322265625, 291.9686279296875, 33.03814697265625, 3.4715023040771484, 243.19375610351562, 593.0721435546875, 154.96234130859375, 18.09628677368164, 108.24314880371094, 206.78939819335938, -9.847782135009766, 206.49203491210938, 53.389251708984375, 272.15924072265625, 170.51248168945312, 12.506240844726562, 291.51837158203125, 234.48098754882812, 101.10598754882812, 319.52947998046875, -64.96969604492188, 312.02801513671875, 97.89073181152344, 689.071533203125, -9.754058837890625, 181.74659729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 192.1194610595703, "std": 254.79104614257812, "min": -385.13165283203125, "p10": -59.41862564086914, "median": 153.97461700439453, "p90": 454.39652709960944, "max": 890.0552978515625, "pos_frac": 0.734375, "sample": [65.57449340820312, 620.2293701171875, 15.414199829101562, 297.0348815917969, 237.70960998535156, 430.2763366699219, 93.03630065917969, 316.341552734375, 95.52656555175781, -18.487686157226562, 29.978294372558594, -29.2886962890625, -57.443450927734375, -21.688919067382812, -26.194704055786133, 757.7742919921875, -60.26512908935547, 460.13525390625, 92.37203979492188, 151.90426635742188, -73.8294448852539, -26.058090209960938, 18.82056427001953, 50.777381896972656, 181.228515625, -385.13165283203125, 367.3742370605469, -167.06243896484375, 890.0552978515625, 418.88262939453125, 441.00616455078125, 192.80599975585938, 361.32537841796875, 0.9738998413085938, 586.1859130859375, 204.27984619140625, -47.02275848388672, 71.82781982421875, -29.095605850219727, 370.0677795410156, 303.41937255859375, 434.3154602050781, -211.382568359375, 319.2118835449219, 414.21368408203125, -60.405364990234375, 384.3778076171875, 754.1546630859375, -50.37766647338867, 108.08363342285156, 352.95977783203125, 112.5937271118164, 94.9495849609375, -28.18768310546875, 406.07354736328125, 838.5653076171875, 99.44475555419922, 185.35208129882812, 271.87188720703125, -106.26504516601562, 196.4052276611328, 218.294677734375, 224.58192443847656, 156.0449676513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 225.62379455566406, "std": 282.52362060546875, "min": -276.47894287109375, "p10": -105.84959182739257, "median": 184.73291015625, "p90": 584.6568115234376, "max": 1046.1168212890625, "pos_frac": 0.765625, "sample": [-276.47894287109375, 220.58212280273438, 254.19317626953125, 415.4466857910156, -163.50778198242188, 705.2784423828125, -194.51907348632812, 898.870361328125, 104.05010223388672, -54.70307922363281, -146.54054260253906, -106.2067642211914, 390.40179443359375, 289.75927734375, -178.51593017578125, 68.98023986816406, 369.0688171386719, -105.01618957519531, 497.29620361328125, -103.12284088134766, 300.5412902832031, 215.5189208984375, 512.5716552734375, 94.59929656982422, -8.596569061279297, 153.24769592285156, 25.68961524963379, 385.08123779296875, 84.5394287109375, 30.87242889404297, 473.06939697265625, 281.0859375, 1046.1168212890625, 411.5711669921875, -79.50141906738281, 56.53626251220703, 218.55255126953125, 103.66861724853516, -126.55445098876953, 677.189453125, 115.30238342285156, 479.3478698730469, 241.49485778808594, 334.12213134765625, -38.2447509765625, 180.62342834472656, 474.9568786621094, 273.149658203125, 188.44476318359375, 692.638916015625, 69.24890899658203, -51.81571960449219, -46.17350769042969, 326.01483154296875, 215.687744140625, 587.5963134765625, 66.2357177734375, 171.66368103027344, 854.979248046875, 516.33984375, 118.94359588623047, 577.7979736328125, 169.43150329589844, 181.02105712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 184.01300048828125, "std": 204.1316375732422, "min": -190.6436767578125, "p10": -54.02759017944336, "median": 158.1941680908203, "p90": 427.22727050781253, "max": 647.0943603515625, "pos_frac": 0.796875, "sample": [66.62106323242188, 516.7529907226562, 46.318931579589844, 129.94158935546875, -31.8759708404541, 338.1795654296875, -35.050315856933594, 214.66195678710938, 364.2220764160156, 206.28492736816406, 153.57028198242188, 403.3021545410156, -86.13119506835938, 304.85321044921875, 647.0943603515625, 128.8739013671875, 35.99886703491211, 20.515396118164062, 356.93048095703125, 102.24105834960938, 400.3215637207031, 202.5257568359375, -58.931846618652344, 262.087158203125, 41.02279281616211, 348.04156494140625, 416.11767578125, 72.97251892089844, 19.915943145751953, 496.1315612792969, 563.3054809570312, 380.0920715332031, 544.466552734375, -66.05459594726562, 48.471107482910156, 180.69239807128906, 199.88739013671875, 168.1202392578125, 431.988525390625, -55.294036865234375, 24.57480239868164, 634.218017578125, -31.21343994140625, -46.23678970336914, 3.634796142578125, 105.17889404296875, -190.6436767578125, 178.21942138671875, 40.93663787841797, -108.87013244628906, -37.40771484375, 370.12872314453125, 21.004491806030273, 414.6045837402344, 359.1590270996094, 289.389892578125, 292.9312744140625, -51.072547912597656, 162.81805419921875, 403.189697265625, 109.29643249511719, -72.23554229736328, 40.173545837402344, 385.8681945800781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 179.164306640625, "std": 261.31640625, "min": -436.6070861816406, "p10": -130.46442184448242, "median": 170.96131896972656, "p90": 542.3219726562501, "max": 785.7476196289062, "pos_frac": 0.734375, "sample": [33.473228454589844, 209.74691772460938, 27.385601043701172, 344.71826171875, -51.81928253173828, -239.80746459960938, -43.12323760986328, 174.9563446044922, 533.1555786132812, 136.72352600097656, 167.74267578125, 246.07553100585938, -436.6070861816406, 423.699951171875, -38.07124328613281, -33.84693145751953, 191.57554626464844, 141.5602264404297, -292.3591613769531, -23.114564895629883, 213.47824096679688, -152.55062866210938, 42.741058349609375, -15.07876205444336, 217.8124542236328, 744.9029541015625, 320.43017578125, 431.31256103515625, 63.57171630859375, 479.27880859375, -57.031646728515625, -53.34819412231445, 162.2531280517578, 170.98141479492188, 508.2818603515625, 553.80712890625, -158.62464904785156, 785.7476196289062, 724.12255859375, 514.2748413085938, 252.93104553222656, 231.21275329589844, 569.724365234375, 412.9260559082031, 181.36611938476562, 214.82708740234375, 62.79705047607422, 222.3324432373047, -36.782379150390625, -118.32064056396484, 23.71417236328125, 3.6668968200683594, 546.2504272460938, 8.11856460571289, 388.560791015625, 328.21636962890625, 164.86233520507812, -135.6688995361328, 268.9316711425781, 42.70899963378906, 170.94122314453125, -262.9398498535156, 555.3643798828125, 402.3472900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 280.1239929199219, "std": 272.0365295410156, "min": -141.07125854492188, "p10": -28.538698387145992, "median": 247.90323638916016, "p90": 620.3296630859375, "max": 1302.1602783203125, "pos_frac": 0.84375, "sample": [120.78900146484375, 153.48745727539062, 556.80419921875, 551.8018188476562, 332.9197692871094, -25.56414031982422, 1302.1602783203125, 292.0130920410156, 118.61514282226562, 642.6431884765625, 525.661376953125, 99.21373748779297, 72.9054946899414, 132.603759765625, 242.3830108642578, 514.950439453125, 286.2993469238281, -82.0518569946289, 66.91961669921875, 368.3602294921875, 536.3186645507812, 79.81893920898438, 24.348791122436523, -22.142059326171875, -0.3776702880859375, -29.813508987426758, 150.21530151367188, 273.13873291015625, -91.62615203857422, 681.1650390625, 620.6102905273438, 88.0517578125, -58.584197998046875, 272.4535217285156, 294.861083984375, 158.8829803466797, 607.591796875, -50.375282287597656, 201.53892517089844, 619.6748657226562, 260.3522644042969, 97.88375854492188, 742.1298828125, 430.5974426269531, 136.44229125976562, 166.87454223632812, 425.8481750488281, 188.60638427734375, 55.24720764160156, 793.7300415039062, 230.40777587890625, 731.1128540039062, 514.8995361328125, 378.62310791015625, 253.4234619140625, 498.8250427246094, 525.3501586914062, -138.15786743164062, 166.1107940673828, 381.9859924316406, 281.4745788574219, 304.4462585449219, 14.125244140625, -141.07125854492188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 283.072509765625, "std": 279.84912109375, "min": -329.0406494140625, "p10": -26.195782470703115, "median": 266.241455078125, "p90": 662.7653259277345, "max": 1267.388427734375, "pos_frac": 0.828125, "sample": [-329.0406494140625, 358.8299255371094, 572.1541748046875, 264.4808044433594, 177.17794799804688, 218.8648681640625, 280.6099853515625, 261.5577392578125, 278.00250244140625, 131.2978973388672, 331.35028076171875, 259.38787841796875, 341.3205871582031, 31.75507354736328, 393.2696533203125, 888.050048828125, -32.21305847167969, 642.6611328125, 71.44990539550781, 707.385498046875, 487.08013916015625, 91.5323715209961, 287.94268798828125, 394.6576843261719, 333.4571228027344, 457.717529296875, 1267.388427734375, 582.9254150390625, -44.04254150390625, 646.6968994140625, 186.6005401611328, 243.57496643066406, -6.91839599609375, 285.76953125, 455.9371032714844, 42.277748107910156, 268.0021057128906, -37.061065673828125, 669.6517944335938, 460.60943603515625, 422.0674743652344, -146.71151733398438, 45.0194091796875, 448.1087646484375, 10.335458755493164, 323.68011474609375, 681.9994506835938, -0.0655517578125, 234.51779174804688, 453.3908386230469, -10.075935363769531, 119.57051086425781, -161.10830688476562, 452.27239990234375, 249.5528564453125, -30.263328552246094, 22.734642028808594, 253.81350708007812, 63.017147064208984, 32.47132110595703, 732.4600219726562, 700.6395874023438, -16.70484161376953, 313.76690673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 215.07342529296875, "std": 292.4775695800781, "min": -547.7803344726562, "p10": -105.84927902221678, "median": 196.11620330810547, "p90": 569.804656982422, "max": 992.48486328125, "pos_frac": 0.75, "sample": [443.3258361816406, 260.24017333984375, -227.24510192871094, 94.69808197021484, -547.7803344726562, 592.058837890625, -282.5691833496094, 325.0939025878906, 251.46273803710938, 913.7478637695312, 161.00392150878906, 664.3036499023438, 440.8804931640625, 354.99359130859375, -74.62883758544922, 74.19374084472656, 63.905487060546875, -286.339599609375, 340.3857116699219, -31.933486938476562, -115.54437255859375, 467.6692810058594, 228.26571655273438, 484.70928955078125, 350.54901123046875, 22.424592971801758, -11.22250747680664, 530.049560546875, 217.09999084472656, 230.96633911132812, -32.19273376464844, 695.7696533203125, 139.73980712890625, -9.939361572265625, 120.77338409423828, 107.06501770019531, 474.9736328125, 362.9388122558594, 308.7137145996094, -129.72396850585938, 107.3640365600586, 586.2880859375, 655.40673828125, 115.59425354003906, 373.58294677734375, -17.08808135986328, -16.88309669494629, 384.08038330078125, 531.3433227539062, 404.6296081542969, 38.23615646362305, 175.13241577148438, -18.403947830200195, 159.6243896484375, 992.48486328125, 92.56796264648438, 362.5888671875, 320.4226379394531, -83.2273941040039, 270.232177734375, 46.01068115234375, 164.79550170898438, 527.6251831054688, -380.5913391113281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 222.5535430908203, "std": 277.9830627441406, "min": -313.2007141113281, "p10": -68.68314361572264, "median": 150.1745147705078, "p90": 563.7040161132813, "max": 1342.556396484375, "pos_frac": 0.84375, "sample": [192.67095947265625, 112.8410415649414, 113.34381103515625, 142.2091827392578, 121.86265563964844, 132.35601806640625, 344.78680419921875, 99.80115509033203, 225.47286987304688, 121.60708618164062, 307.82061767578125, 472.90045166015625, -74.08445739746094, 212.88946533203125, 54.65296936035156, -56.080078125, 93.74040222167969, 777.7628784179688, 603.6006469726562, 308.3989562988281, 130.95330810546875, 55.82727813720703, 64.73848724365234, 21.59259033203125, 160.95591735839844, -171.6533203125, 106.39703369140625, 41.516300201416016, 200.41259765625, 155.9588623046875, -113.62556457519531, -41.45760726928711, 144.39016723632812, 40.11957550048828, 441.9278564453125, 688.8158569335938, 417.7015686035156, 75.85484313964844, -146.38311767578125, 546.1395874023438, 255.55665588378906, -200.96505737304688, 238.40664672851562, 30.73075294494629, 9.936361312866211, 339.7677307128906, 288.60479736328125, 286.6103515625, 365.8560485839844, -109.65704345703125, 84.9224624633789, 424.9686279296875, -10.950902938842773, 298.3621826171875, 279.521240234375, 281.5997009277344, 1342.556396484375, 571.2316284179688, 478.03594970703125, -313.2007141113281, 831.2603759765625, 782.5634765625, 116.78788757324219, 442.1854553222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 228.12368774414062, "std": 294.1232604980469, "min": -305.2411193847656, "p10": -111.96355667114258, "median": 197.9096221923828, "p90": 637.2090454101562, "max": 944.7164916992188, "pos_frac": 0.78125, "sample": [-157.28115844726562, 194.329345703125, 341.6214294433594, 118.73609924316406, -305.2411193847656, 37.2237548828125, -98.86724090576172, 49.61678695678711, -71.44390869140625, 505.64691162109375, 82.94935607910156, -273.1566467285156, 501.1954345703125, 679.3710327148438, 159.28761291503906, -92.61665344238281, 370.553955078125, 14.438159942626953, 115.84093475341797, -80.51344299316406, 329.60076904296875, 605.9642944335938, 426.3818359375, 48.319862365722656, 637.519775390625, 944.7164916992188, 371.015380859375, 129.19607543945312, 624.53564453125, 214.57565307617188, 201.48989868164062, -67.2645263671875, -224.95236206054688, 645.2571411132812, 350.68377685546875, 409.23907470703125, 874.0346069335938, 329.83917236328125, 208.20135498046875, 128.5353546142578, 251.37892150878906, 142.34295654296875, 75.0643310546875, -154.5277099609375, 547.0940551757812, -112.1778793334961, 295.51275634765625, 32.57735061645508, 514.5755615234375, 636.4840087890625, 406.6419677734375, 321.4967041015625, 745.761962890625, 341.1974182128906, 778.56298828125, 262.8291015625, -81.35418701171875, 39.12803649902344, -111.46347045898438, 110.07162475585938, 93.57073211669922, 158.08914184570312, 305.49365234375, -277.0137939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 182.48397827148438, "std": 238.42276000976562, "min": -519.1842041015625, "p10": -57.81589889526367, "median": 128.81985473632812, "p90": 500.1575256347657, "max": 757.352783203125, "pos_frac": 0.796875, "sample": [88.75416564941406, 118.72137451171875, 444.10589599609375, 56.382991790771484, 661.5924072265625, 2.215818405151367, 365.63653564453125, 85.65997314453125, 516.8246459960938, 52.93567657470703, 27.15728759765625, 7.157258987426758, -56.24371337890625, 543.341064453125, 482.7777099609375, 285.0794677734375, 469.64801025390625, 134.52383422851562, 235.39869689941406, -62.34803009033203, 216.63299560546875, 415.8556823730469, -58.48969268798828, -1.9377517700195312, 302.6357421875, 290.6293029785156, 757.352783203125, -82.51036071777344, 507.60601806640625, 60.83233642578125, 444.4112548828125, 193.2154998779297, 9.68753433227539, 347.4677734375, 88.58393859863281, 223.97943115234375, 316.76544189453125, 289.3489990234375, 395.28924560546875, 81.73741149902344, 180.1370849609375, -242.75604248046875, -208.15858459472656, -44.73846435546875, -52.0849609375, 23.024747848510742, 94.72988891601562, 394.3678894042969, 131.7863006591797, 77.44552612304688, -205.45925903320312, -519.1842041015625, 84.1352310180664, 118.84349060058594, 235.45928955078125, 399.26629638671875, 444.39459228515625, 328.94049072265625, 557.9786376953125, 16.431167602539062, 125.85340881347656, -28.150901794433594, 524.60498046875, -16.306365966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 291.8758544921875, "std": 227.24330139160156, "min": -236.53314208984375, "p10": 4.872182273864768, "median": 314.49571228027344, "p90": 534.73837890625, "max": 1065.434326171875, "pos_frac": 0.890625, "sample": [313.2810363769531, 352.3883361816406, 215.89938354492188, 520.707275390625, 376.802978515625, 568.060546875, 159.28082275390625, 38.58360290527344, 498.3105163574219, 156.48410034179688, -6.458797454833984, 380.60577392578125, 467.36279296875, 31.617305755615234, 315.71038818359375, 436.1629638671875, 266.68951416015625, 228.6002655029297, -34.063453674316406, 438.2452087402344, 223.75442504882812, 211.60964965820312, 604.81884765625, 341.38714599609375, 361.3516540527344, 132.23712158203125, 424.29217529296875, -4.470184326171875, 64.80168151855469, 34.44801330566406, 139.7646942138672, 426.32061767578125, 437.55804443359375, 416.06085205078125, 368.8515625, 536.6554565429688, 431.82659912109375, 26.671037673950195, 187.12091064453125, -21.079917907714844, 362.05157470703125, 530.2651977539062, 405.1813659667969, 645.9405517578125, 173.80653381347656, 343.939697265625, 649.7703247070312, 27.693069458007812, -236.53314208984375, 141.931396484375, 108.27875518798828, 517.6659545898438, 482.3282165527344, 505.5495910644531, 379.1744079589844, 54.131378173828125, 190.65972900390625, 1065.434326171875, 166.45111083984375, 267.13897705078125, 299.1467590332031, -17.391265869140625, -107.29515075683594, 656.4857788085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 153.23370361328125, "std": 280.3700256347656, "min": -710.6107177734375, "p10": -143.7051559448242, "median": 135.9088897705078, "p90": 496.92633972167977, "max": 819.9000244140625, "pos_frac": 0.75, "sample": [-168.246337890625, -124.85778045654297, 819.9000244140625, -60.39020538330078, -251.0099334716797, 14.076383590698242, -66.75337982177734, 322.7132263183594, 27.02578353881836, 704.990966796875, 242.12118530273438, -111.42156982421875, 138.89486694335938, 291.77239990234375, 322.23797607421875, 811.191162109375, 509.8447570800781, 175.64218139648438, 60.785404205322266, 409.2671813964844, 292.5504150390625, 123.30073547363281, 123.06558227539062, 228.75399780273438, 224.76327514648438, 148.56695556640625, -453.53125, 251.61660766601562, 140.37515258789062, 123.13811492919922, 277.4095458984375, 323.6943359375, 106.00200653076172, 122.57081604003906, 184.48202514648438, 3.1077194213867188, 340.7362060546875, 96.27837371826172, -136.2616424560547, 335.3569641113281, 473.4183654785156, -199.4027862548828, 693.4642333984375, 64.51527404785156, 483.7342834472656, 191.88372802734375, 393.81549072265625, -396.5310974121094, -10.27700424194336, 607.9385375976562, 200.5814208984375, 123.882568359375, -710.6107177734375, 85.24304962158203, 243.37518310546875, -146.89523315429688, -118.35646057128906, 41.58702850341797, 132.92291259765625, 502.580078125, 224.22909545898438, -42.321266174316406, 100.85031127929688, -56.431175231933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 148.69046020507812, "std": 286.7040100097656, "min": -406.13482666015625, "p10": -241.893635559082, "median": 141.8382568359375, "p90": 527.486572265625, "max": 845.5579833984375, "pos_frac": 0.71875, "sample": [85.3572769165039, 312.8019104003906, 176.4484100341797, -406.13482666015625, 717.0624389648438, 445.21044921875, 127.96289825439453, 289.55511474609375, 39.986236572265625, -98.27677154541016, 433.492919921875, 531.0791015625, 46.117523193359375, 140.88388061523438, -93.84315490722656, 519.10400390625, 268.4892272949219, 139.83395385742188, 232.54327392578125, 845.5579833984375, 737.0198364257812, 205.9677734375, 288.7707214355469, -150.67666625976562, 258.7700500488281, -147.86651611328125, 19.25506591796875, 294.830322265625, -216.28921508789062, 184.18438720703125, 532.8849487304688, 135.98028564453125, 7.4472198486328125, 312.07135009765625, -24.221420288085938, 346.53802490234375, 238.30136108398438, -167.72482299804688, -396.4879150390625, 26.11726188659668, 225.11090087890625, 235.256591796875, -252.86695861816406, 77.400634765625, 463.2314453125, 111.19488525390625, 225.28033447265625, 314.14404296875, 232.0601348876953, -371.37042236328125, -101.58308410644531, -51.494964599609375, 122.0089111328125, 142.79263305664062, -315.5577697753906, -325.0780944824219, 9.353973388671875, -311.5898132324219, -129.69427490234375, 495.61517333984375, 650.6739501953125, 255.43902587890625, -23.417236328125, 601.1765747070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 216.20925903320312, "std": 301.5441589355469, "min": -535.1423950195312, "p10": -115.42123794555664, "median": 187.53652954101562, "p90": 620.5942810058594, "max": 964.7633666992188, "pos_frac": 0.75, "sample": [404.5845642089844, 247.95407104492188, 512.9766235351562, 70.95391082763672, -535.1423950195312, 190.48233032226562, 599.356689453125, 201.05824279785156, 858.4147338867188, 282.63629150390625, 520.4755859375, 32.565330505371094, 337.08502197265625, 230.27642822265625, -319.3819274902344, 137.1059112548828, 17.524871826171875, 195.14877319335938, 470.91357421875, -21.4874267578125, -60.99632263183594, -240.7305450439453, 629.6961059570312, -117.47047424316406, -224.56622314453125, 113.52913665771484, 890.4962768554688, 688.1764526367188, -204.43370056152344, 13.600065231323242, 678.1165771484375, -26.059249877929688, 380.5559997558594, 283.51763916015625, -121.49394989013672, -53.33447265625, 572.211669921875, -110.63968658447266, 299.02880859375, 428.9652099609375, -7.061494827270508, 111.89461517333984, 742.590087890625, 87.75530242919922, 22.548168182373047, 272.4674377441406, 247.77200317382812, 457.12115478515625, 472.44549560546875, -38.82273864746094, 396.949462890625, 125.53948211669922, 123.89079284667969, 347.96533203125, 179.0216827392578, 184.59072875976562, 125.74263000488281, 70.26979064941406, -3.9085540771484375, 507.76300048828125, 255.92559814453125, 964.7633666992188, -77.36289978027344, 15.861557006835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 255.91798400878906, "std": 333.7793273925781, "min": -423.56805419921875, "p10": -145.29068756103516, "median": 193.72489166259766, "p90": 732.3536437988281, "max": 1330.793701171875, "pos_frac": 0.78125, "sample": [338.00750732421875, 296.74053955078125, 731.087158203125, -296.41015625, 45.29618835449219, 652.2216186523438, 560.2267456054688, 221.0445556640625, 314.9844665527344, -17.215469360351562, 1330.793701171875, 302.63177490234375, 14.3134765625, 181.2781982421875, -48.135406494140625, 79.8254623413086, 89.79225158691406, 61.551963806152344, 801.672607421875, -192.10995483398438, 125.74017333984375, 207.2384490966797, 678.90771484375, -149.12832641601562, 504.7355041503906, 160.337158203125, 138.71524047851562, 199.98794555664062, 162.63229370117188, 155.75808715820312, 196.50994873046875, 365.6048583984375, 357.82012939453125, -152.40289306640625, 165.77947998046875, -132.7576904296875, 371.2723083496094, 736.1829833984375, 17.0207576751709, 1029.1837158203125, 599.7266235351562, 342.8073425292969, 190.93983459472656, -160.66378784179688, 481.661865234375, 503.7026062011719, 732.8964233398438, 276.5758056640625, -8.407241821289062, 354.65594482421875, 114.75198364257812, 275.0579528808594, 113.51927185058594, -47.683311462402344, 819.7701416015625, 141.1847381591797, -46.356475830078125, 229.29898071289062, -136.33619689941406, 880.4107055664062, -204.44940185546875, -423.56805419921875, 552.1580810546875, 190.36276245117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 239.9990234375, "std": 286.50140380859375, "min": -505.4219055175781, "p10": -71.93740921020508, "median": 193.1185302734375, "p90": 640.0226196289063, "max": 846.1212768554688, "pos_frac": 0.84375, "sample": [117.7387466430664, -181.6464385986328, -256.62322998046875, 226.985595703125, 21.008790969848633, 812.5193481445312, 499.073486328125, 84.67839050292969, 235.44223022460938, 152.49887084960938, 647.894287109375, 188.2998046875, -35.993804931640625, 251.78924560546875, 390.09320068359375, 15.435861587524414, 24.92978286743164, 755.4597778320312, 311.6872863769531, -73.97126007080078, -389.9192199707031, -26.224273681640625, 236.85635375976562, -505.4219055175781, 130.83642578125, 59.14332580566406, 160.03448486328125, 129.1798095703125, 560.2308349609375, 484.11083984375, 170.54542541503906, 508.292724609375, 323.96917724609375, 560.7445068359375, 442.8410949707031, -87.25619506835938, -120.84256744384766, 621.6553955078125, 290.03887939453125, 10.607131958007812, 846.1212768554688, 135.87933349609375, 407.0704345703125, 197.937255859375, 406.4803161621094, 454.18646240234375, 310.7142333984375, 279.925048828125, 576.1929931640625, 754.64990234375, 175.79998779296875, 46.360992431640625, 88.075927734375, 172.93760681152344, 9.855300903320312, 263.65478515625, 96.79518127441406, 114.34262084960938, 778.3782348632812, 416.63079833984375, 441.7803039550781, 656.2808227539062, -67.19175720214844, 50.35796356201172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 218.50360107421875, "std": 349.6275329589844, "min": -742.0427856445312, "p10": -109.66552734374997, "median": 144.3060760498047, "p90": 561.9406555175782, "max": 1306.065185546875, "pos_frac": 0.734375, "sample": [-170.99063110351562, 138.86679077148438, -50.42732238769531, 1306.065185546875, 282.23223876953125, 1164.9166259765625, 279.253173828125, 563.3363037109375, 438.75347900390625, 26.309776306152344, 149.745361328125, 363.2662658691406, 41.0111083984375, 214.17340087890625, 68.69363403320312, -49.115257263183594, 555.32177734375, 79.78860473632812, 571.7366333007812, 326.7786865234375, -5.311431884765625, -3.1767005920410156, 416.0138244628906, 52.43827819824219, -88.94868469238281, 874.0423583984375, -8.065999984741211, 74.99467468261719, -194.67088317871094, 351.7837829589844, 298.6977844238281, 286.9772644042969, -63.596527099609375, 917.1425170898438, -411.56134033203125, 133.6648406982422, -21.644119262695312, 533.514892578125, 117.13263702392578, -41.332855224609375, 169.70130920410156, 79.24011993408203, 83.47058868408203, 94.15726470947266, 217.45045471191406, 197.05087280273438, -149.41505432128906, 533.412109375, 268.50628662109375, 469.36468505859375, 110.60368347167969, 558.6841430664062, -118.54417419433594, 432.980712890625, 316.6933288574219, -23.49864959716797, -149.10589599609375, 195.01983642578125, 32.41935729980469, 107.55055236816406, 1121.5338134765625, -742.0427856445312, 162.06326293945312, 499.1250305175781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 212.50088500976562, "std": 296.7629699707031, "min": -532.6074829101562, "p10": -151.5289741516113, "median": 170.42767333984375, "p90": 558.4417236328126, "max": 1010.762939453125, "pos_frac": 0.78125, "sample": [658.3956298828125, 300.3669128417969, -28.5133056640625, 290.53179931640625, -80.37275695800781, -170.86541748046875, 462.84625244140625, 269.59716796875, 321.86431884765625, 13.442424774169922, 15.157003402709961, -25.725906372070312, 5.264091491699219, -532.6074829101562, 167.4365234375, 861.12744140625, 351.75738525390625, 48.332923889160156, 323.91070556640625, 136.36236572265625, 31.980438232421875, 250.47207641601562, -472.28167724609375, 61.206459045410156, 444.3956298828125, -201.89295959472656, 571.2980346679688, 95.75096130371094, 93.22564697265625, 96.9346694946289, -260.4043884277344, 111.97959899902344, 541.0692138671875, 675.4703369140625, 28.047595977783203, 250.97702026367188, 337.008056640625, 467.56634521484375, 461.4058837890625, 664.2227783203125, 378.4193115234375, 173.4188232421875, 135.29122924804688, -314.2677001953125, 462.68475341796875, 494.8009338378906, 270.0196838378906, -185.91317749023438, 1010.762939453125, 509.02960205078125, 369.1844482421875, -0.8613319396972656, -2.92205810546875, 369.0336608886719, 133.73806762695312, 515.3824462890625, 120.44758605957031, 565.8870849609375, -106.41060638427734, 163.4710693359375, 433.48968505859375, 399.05938720703125, 104.34811401367188, -34.77720260620117], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 265.6058349609375, "std": 312.7511291503906, "min": -531.6541137695312, "p10": -99.03695907592773, "median": 227.0864486694336, "p90": 674.9961730957033, "max": 858.7758178710938, "pos_frac": 0.8125, "sample": [0.6823348999023438, 807.8614501953125, 241.7724609375, 53.44589614868164, 849.1835327148438, 120.35539245605469, 470.88092041015625, -242.13235473632812, 44.02140808105469, -5.0562286376953125, 424.4366455078125, 0.4925365447998047, 608.2709350585938, 108.39141082763672, -190.77212524414062, 447.19219970703125, 586.0538940429688, -184.92105102539062, -77.99066162109375, 241.6380157470703, 94.25712585449219, 23.101667404174805, -140.2054443359375, 858.7758178710938, 744.8641967773438, 73.76177215576172, 456.643310546875, 294.48394775390625, 512.8303833007812, -99.75990295410156, 709.758544921875, 545.6886596679688, 158.64527893066406, 567.7825317382812, 145.864990234375, 613.5982666015625, 556.662841796875, 224.57223510742188, 435.1063232421875, 242.88031005859375, 219.03492736816406, 509.1988220214844, -56.22148132324219, 249.052734375, 583.48095703125, 89.08876037597656, 829.40869140625, 22.533424377441406, -162.70082092285156, 75.14582061767578, 13.643264770507812, -1.685495376586914, 624.0728759765625, 689.8936157226562, 309.96087646484375, 633.6788330078125, 185.68197631835938, 342.04815673828125, 229.6006622314453, 174.13113403320312, -97.35009002685547, 105.37638854980469, 640.2354736328125, -531.6541137695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 248.51858520507812, "std": 282.04315185546875, "min": -423.49560546875, "p10": -85.43736572265624, "median": 234.0575714111328, "p90": 640.5532104492188, "max": 866.7171630859375, "pos_frac": 0.765625, "sample": [361.6578063964844, 784.0282592773438, 154.00897216796875, -69.76303100585938, 287.781494140625, -42.31597900390625, 241.04489135742188, -124.91439819335938, 272.6273498535156, 69.9384536743164, 730.504150390625, 762.1243896484375, -27.15550994873047, 866.7171630859375, 109.23087310791016, 183.0860595703125, 225.2556610107422, 459.1827087402344, 227.07025146484375, -28.556806564331055, 246.15939331054688, 568.0165405273438, 489.3572692871094, -24.84061050415039, 533.2679443359375, 839.2621459960938, 454.8740539550781, -214.13169860839844, 645.3841552734375, 144.02151489257812, 348.7152099609375, 223.89271545410156, 12.181327819824219, 216.70712280273438, 52.2418098449707, 431.1705627441406, 129.99232482910156, 324.23974609375, 407.88983154296875, 596.6657104492188, 258.43182373046875, -55.7064208984375, -423.49560546875, 266.7022705078125, -97.32235717773438, 102.9347152709961, 507.58929443359375, 343.912841796875, -211.4982452392578, 329.8846435546875, 106.64836120605469, 629.281005859375, -92.15493774414062, -62.27069091796875, -49.49188995361328, 502.9677429199219, 192.1868896484375, 669.0889892578125, 389.3204345703125, 0.0813140869140625, 367.4176025390625, 174.90081787109375, 303.9522399902344, -114.79003143310547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 262.3010559082031, "std": 268.68267822265625, "min": -501.985107421875, "p10": -28.802073669433593, "median": 237.34703826904297, "p90": 608.21572265625, "max": 1097.3211669921875, "pos_frac": 0.84375, "sample": [734.993408203125, 295.6792297363281, 615.4617919921875, 551.9834594726562, 472.8868408203125, 141.94461059570312, 203.86749267578125, 395.32464599609375, 168.17449951171875, 232.6739501953125, 340.23431396484375, 355.98870849609375, 593.686279296875, 197.8257293701172, 160.98046875, -28.350357055664062, 301.71917724609375, 664.8363647460938, 222.23855590820312, 292.7138366699219, 235.71510314941406, -23.893789291381836, -12.76162338256836, 50.324806213378906, 308.71844482421875, 168.21066284179688, -53.88847351074219, 498.27838134765625, 139.29342651367188, 457.08111572265625, 509.4969482421875, 408.8499450683594, 211.13339233398438, 49.487762451171875, 9.242469787597656, 490.9196472167969, 178.69406127929688, 1097.3211669921875, 32.82948303222656, -297.25238037109375, 276.3176574707031, -63.910911560058594, 236.17189025878906, 614.442626953125, 215.64035034179688, -28.99566650390625, 394.6660461425781, 347.3409423828125, 495.2625427246094, 363.9384765625, 98.48991394042969, 333.38592529296875, 423.8837890625, 240.13754272460938, -501.985107421875, 207.03271484375, 238.52218627929688, 432.2236633300781, -66.7353744506836, 698.8734130859375, 78.33573913574219, -380.53521728515625, 145.53099060058594, 616.56982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 264.68035888671875, "std": 318.6306457519531, "min": -462.29254150390625, "p10": -105.58756942749022, "median": 222.4285430908203, "p90": 686.7882263183594, "max": 1356.24072265625, "pos_frac": 0.796875, "sample": [243.883544921875, 439.46142578125, 79.57540893554688, 232.1693115234375, -113.10372161865234, 140.99502563476562, 195.85275268554688, 681.830078125, 212.68777465820312, 162.97283935546875, 416.4728088378906, 851.1589965820312, 121.61820983886719, 511.2337951660156, 352.10150146484375, 208.65768432617188, -462.29254150390625, 883.3720092773438, 57.16883850097656, 348.9329833984375, 476.5679931640625, 435.2652282714844, 304.66851806640625, -30.320323944091797, 166.24395751953125, -57.79156494140625, -310.6327209472656, 168.86111450195312, 70.73453521728516, 401.27081298828125, 734.70458984375, 412.7828369140625, -244.97918701171875, 151.45147705078125, 478.7778015136719, 688.9131469726562, 337.3955078125, 375.4725036621094, 720.908447265625, 134.36883544921875, -44.134788513183594, 593.5036010742188, 154.40591430664062, -71.27305603027344, 431.23614501953125, -160.04180908203125, 165.04107666015625, 423.8573303222656, 293.13104248046875, 241.33763122558594, -88.04988098144531, 392.99322509765625, 848.6190795898438, 198.91468811035156, 1356.24072265625, 282.4598388671875, 150.989990234375, 505.6614990234375, 87.0426254272461, -208.93017578125, 64.79258728027344, 566.0826416015625, -188.880615234375, -34.86747741699219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 283.1421813964844, "std": 326.8377380371094, "min": -818.4915771484375, "p10": -95.2658363342285, "median": 286.23480224609375, "p90": 668.1856018066406, "max": 1130.665771484375, "pos_frac": 0.859375, "sample": [144.0636749267578, 80.83778381347656, 64.89836120605469, 1130.665771484375, 27.6164493560791, 104.36409759521484, 127.4744873046875, 380.8912353515625, 319.23992919921875, 333.16839599609375, 130.02008056640625, 432.4574890136719, -58.93254852294922, 613.0302734375, 876.40087890625, 532.5952758789062, 447.4363708496094, 517.9443359375, 520.3280029296875, 186.1524658203125, 367.1701354980469, -244.0457763671875, 225.2078857421875, 272.05682373046875, 73.91522216796875, -80.79222869873047, 493.5438232421875, 350.7538146972656, 192.21937561035156, 605.9556884765625, 371.071533203125, 658.3368530273438, 595.678955078125, 672.406494140625, 616.936767578125, 419.64215087890625, 105.84553527832031, -101.46881103515625, 265.9103698730469, 60.83647918701172, 815.288818359375, 133.9740753173828, -263.19903564453125, 169.84585571289062, -316.574951171875, 213.4600372314453, 300.41278076171875, 750.09521484375, 68.8313980102539, -324.69879150390625, 409.52569580078125, 43.221832275390625, 731.8728637695312, 426.8599853515625, 267.88385009765625, -117.88105010986328, 478.34783935546875, 375.635498046875, 212.27899169921875, 126.23358154296875, -818.4915771484375, 411.63623046875, 801.9765625, 392.72943115234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 203.40545654296875, "std": 343.60833740234375, "min": -767.554443359375, "p10": -134.42196044921874, "median": 186.79959106445312, "p90": 586.0140686035157, "max": 1068.325439453125, "pos_frac": 0.703125, "sample": [257.8440856933594, 42.332122802734375, 60.0948486328125, 632.804931640625, 567.8712768554688, 692.2022705078125, 528.6795043945312, -56.21794128417969, 156.08428955078125, 557.7283935546875, 448.0231018066406, 98.46189880371094, 435.06719970703125, 375.52734375, 990.50927734375, -139.42306518554688, 419.18853759765625, 380.1332702636719, 681.7908325195312, 218.08920288085938, 965.6245727539062, 593.78955078125, -222.8748779296875, -109.69624328613281, -97.23909759521484, 149.94479370117188, 566.494384765625, 60.33359909057617, -337.2832946777344, -79.09586334228516, 152.00135803222656, 212.85357666015625, -23.190383911132812, -1.8054466247558594, 294.81829833984375, -108.93496704101562, 303.1605224609375, 90.94348907470703, 190.90628051757812, -122.75271606445312, 26.671348571777344, -2.807586669921875, 1068.325439453125, 219.79324340820312, 476.40997314453125, 400.7911376953125, 44.76422119140625, -221.4769287109375, 557.4532470703125, -122.06727600097656, -28.05381202697754, 394.8626708984375, -90.84939575195312, 270.7060546875, 182.69290161132812, -237.48062133789062, 102.35302734375, 330.80462646484375, 417.5810546875, 171.61956787109375, 207.14414978027344, -767.554443359375, -621.66015625, 413.13800048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 251.5926513671875, "std": 266.831298828125, "min": -323.32403564453125, "p10": -83.8532157897949, "median": 211.79473114013672, "p90": 642.1873474121094, "max": 1015.148193359375, "pos_frac": 0.84375, "sample": [1015.148193359375, 674.9032592773438, 148.5631103515625, 163.08544921875, 155.83473205566406, 804.4349365234375, -94.43863677978516, 120.2018814086914, 334.8988037109375, -171.46002197265625, 148.41114807128906, 248.292724609375, 383.844482421875, 480.5068359375, 447.30126953125, 680.3280029296875, 214.34510803222656, 74.06513977050781, 419.368408203125, 113.95957946777344, 433.7551574707031, -165.1873016357422, 0.0806427001953125, 296.9483947753906, 692.41552734375, -99.3932113647461, 297.292724609375, -59.153900146484375, 560.1397094726562, 359.74102783203125, 647.7196044921875, -101.9498519897461, 9.726810455322266, 268.70379638671875, 442.3526916503906, -323.32403564453125, -145.43972778320312, 460.9385986328125, -3.5803680419921875, 264.3363952636719, 206.44692993164062, 209.24435424804688, 352.26885986328125, 49.410789489746094, 234.76856994628906, 152.1260223388672, 148.9713897705078, 316.7082824707031, 221.1885528564453, 189.05274963378906, 629.2787475585938, 562.3583984375, 246.62945556640625, -51.213233947753906, 137.904296875, 725.2282104492188, 73.66801452636719, 22.61739730834961, 485.5810546875, 567.435302734375, 143.23753356933594, 137.0426025390625, 20.273117065429688, 123.98599243164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 240.79074096679688, "std": 366.77288818359375, "min": -1032.6982421875, "p10": -163.44906463623045, "median": 240.2686767578125, "p90": 758.0403991699219, "max": 1028.156494140625, "pos_frac": 0.734375, "sample": [-88.9037094116211, -111.10244750976562, 234.32290649414062, -287.1659851074219, 654.3801879882812, -280.08642578125, -181.44012451171875, 648.889404296875, 354.8285827636719, 0.9077033996582031, 283.9998474121094, 193.1409149169922, 65.208251953125, -25.936752319335938, -390.2874755859375, 246.21444702148438, 902.8101806640625, 36.689491271972656, 220.969482421875, 289.48248291015625, 880.9444580078125, -112.3835678100586, -16.26766586303711, 756.6498413085938, 222.42779541015625, -49.423561096191406, 769.470947265625, 59.79811096191406, -80.0634765625, 432.35772705078125, 1028.156494140625, 233.21336364746094, 689.3173828125, 297.0168762207031, 409.64324951171875, 428.69171142578125, 452.013671875, 24.408920288085938, 177.70310974121094, 288.63934326171875, -176.029052734375, -61.09564208984375, 262.3413391113281, 35.16981887817383, 345.1580505371094, 758.6363525390625, -144.33795166015625, 604.9522094726562, 8.892776489257812, 285.9591369628906, 788.1570434570312, 135.56295776367188, -171.63954162597656, 328.2850341796875, 439.400634765625, -33.290855407714844, 805.6641845703125, 563.527099609375, 268.64202880859375, 599.2235717773438, -1032.6982421875, 118.19058227539062, 478.87872314453125, 543.821533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 199.46575927734375, "std": 245.71929931640625, "min": -341.9618225097656, "p10": -95.71852340698241, "median": 184.11284637451172, "p90": 475.95051879882817, "max": 969.408203125, "pos_frac": 0.8125, "sample": [185.13516235351562, 0.413055419921875, 969.408203125, -113.50428771972656, 4.098968505859375, 133.3120574951172, -120.70414733886719, -42.84861755371094, 746.4944458007812, 113.72249603271484, 484.6278991699219, 549.6718139648438, 183.0905303955078, -279.2389831542969, 377.45953369140625, 82.42050170898438, -96.58689880371094, 70.29875183105469, 361.725341796875, 337.5238037109375, 152.38027954101562, 309.7328796386719, 57.24613952636719, 423.90277099609375, 359.9740905761719, 316.95513916015625, 147.91119384765625, -260.47564697265625, -341.9618225097656, 88.47400665283203, 342.6527099609375, 30.341381072998047, 55.305908203125, 188.89520263671875, 194.64527893066406, 19.011436462402344, 428.21026611328125, 377.1114807128906, 337.28143310546875, 66.44393920898438, -17.310047149658203, 167.1820831298828, 339.9818115234375, 481.0267333984375, 415.61090087890625, 351.90887451171875, 279.2794189453125, 216.88284301757812, 265.5887756347656, -182.5638427734375, 219.489013671875, -11.709251403808594, 80.06957244873047, 464.10601806640625, 138.18685913085938, -93.69231414794922, -84.25653076171875, 512.22509765625, 454.3929138183594, 50.5551872253418, 659.7933959960938, 407.1780090332031, 122.60617065429688, 288.7196350097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 242.71099853515625, "std": 248.2821807861328, "min": -369.3056945800781, "p10": -10.91765289306639, "median": 223.53985595703125, "p90": 587.2443969726564, "max": 772.3198852539062, "pos_frac": 0.890625, "sample": [6.623779296875, 12.684028625488281, 14.095512390136719, 262.7408752441406, 443.8212585449219, 305.9937438964844, 444.0108947753906, 143.69900512695312, -195.85464477539062, 528.0054931640625, 137.88771057128906, 39.36250305175781, 174.21694946289062, 149.5938720703125, 525.9490966796875, 249.743408203125, 639.1058959960938, 306.1361083984375, 88.6106948852539, 101.85892486572266, 348.24212646484375, 142.5314483642578, -170.73561096191406, 330.1410217285156, -157.546142578125, 306.4639892578125, 174.1977996826172, 553.2088623046875, 153.01368713378906, 53.407501220703125, 391.91168212890625, 210.4438934326172, 191.2257080078125, 772.3198852539062, 771.5309448242188, 162.09007263183594, 206.46493530273438, 19.548233032226562, 601.8310546875, 260.52911376953125, -18.435409545898438, 102.67903137207031, 511.1180725097656, 137.74644470214844, 245.78134155273438, 658.9611206054688, 417.2706298828125, -223.3680419921875, 701.213623046875, 336.20416259765625, -369.3056945800781, 88.70088195800781, 230.77255249023438, -217.18850708007812, 253.8640899658203, 358.8493347167969, 466.28143310546875, 96.60491180419922, 309.6826477050781, 710.597412109375, 100.49128723144531, 411.080322265625, 216.30715942382812, 308.48858642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 258.03277587890625, "std": 334.20794677734375, "min": -462.8966064453125, "p10": -121.39803695678708, "median": 184.3561248779297, "p90": 733.6669555664063, "max": 1029.7510986328125, "pos_frac": 0.828125, "sample": [-77.05766296386719, 425.6558532714844, -90.83330535888672, 459.3121032714844, 729.67724609375, 233.10166931152344, 90.8683090209961, 438.652099609375, 160.98605346679688, 499.14794921875, 385.3162536621094, 1029.7510986328125, -347.55975341796875, -57.289703369140625, 735.3768310546875, 438.04376220703125, 293.0587158203125, 419.8319091796875, 28.70213508605957, 48.01972198486328, 103.59600067138672, 341.13446044921875, 192.70233154296875, 161.5078125, 276.9269104003906, 253.52691650390625, 137.93667602539062, -223.50753784179688, 1000.1637573242188, 639.6401977539062, -71.5008544921875, 159.11557006835938, 61.4862060546875, -226.69647216796875, 85.85160827636719, 812.108154296875, 951.4922485351562, 405.8447265625, 829.0677490234375, 137.92282104492188, 226.88284301757812, 672.9168701171875, -280.8865966796875, 161.7753448486328, 58.471160888671875, 176.00991821289062, 156.51588439941406, 175.2506561279297, -134.49720764160156, 447.2552185058594, -462.8966064453125, 870.0045776367188, 63.658912658691406, 27.54747772216797, 111.21011352539062, 133.22105407714844, 517.8447265625, 319.27459716796875, -435.7965087890625, 491.9617004394531, 564.4235229492188, 245.9467010498047, 375.3135681152344, 161.6097412109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 218.617919921875, "std": 270.9674377441406, "min": -437.46734619140625, "p10": -109.45860443115234, "median": 206.50653076171875, "p90": 551.6919067382813, "max": 898.0712890625, "pos_frac": 0.8125, "sample": [284.50341796875, -57.713279724121094, 129.4777069091797, 115.83091735839844, 341.6225280761719, 898.0712890625, 150.33700561523438, 114.00527954101562, 343.657470703125, 470.0882873535156, -305.1032409667969, 314.2666320800781, 89.15310668945312, 46.2122917175293, -437.46734619140625, 217.76809692382812, 616.18505859375, 565.27783203125, -177.58993530273438, 222.42300415039062, 775.2172241210938, 94.67939758300781, 262.9905090332031, 775.064697265625, 355.5537109375, 195.24496459960938, 161.65036010742188, 187.44332885742188, 113.5533676147461, 186.03240966796875, 118.93921661376953, 104.71022033691406, 474.5857238769531, 46.07819747924805, 521.5135498046875, 238.0810546875, 387.03509521484375, 46.55824279785156, 324.9499816894531, 379.0233154296875, 146.76861572265625, 49.27748489379883, 729.348876953125, 471.1279602050781, 390.295166015625, 96.4420166015625, 483.8051452636719, -31.103923797607422, 564.62548828125, -110.44331359863281, 401.37420654296875, -297.6600646972656, 63.521141052246094, 363.1292419433594, -186.31344604492188, 232.74342346191406, -9.142074584960938, 451.85357666015625, -107.16094970703125, -31.738983154296875, 281.48956298828125, -306.9861755371094, 286.8368225097656, 369.5465087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 294.65716552734375, "std": 345.7556457519531, "min": -595.1141967773438, "p10": -54.42901382446287, "median": 229.6042938232422, "p90": 669.7711730957033, "max": 1461.252685546875, "pos_frac": 0.828125, "sample": [1461.252685546875, 574.3995361328125, -88.52162170410156, 516.4913940429688, 533.6713256835938, 878.85009765625, 173.09828186035156, -242.94876098632812, 196.95895385742188, 293.86187744140625, 193.37686157226562, 953.640869140625, 581.3306274414062, 231.58169555664062, -595.1141967773438, 77.68921661376953, 336.58843994140625, 251.191650390625, 522.0641479492188, -26.255271911621094, 105.43785858154297, 770.9662475585938, 159.82406616210938, -31.653053283691406, 163.0234832763672, 227.62689208984375, 115.1081771850586, -22.096481323242188, 157.5537109375, 340.0284118652344, 166.86370849609375, 96.03074645996094, 317.5528564453125, 99.19699096679688, 492.13134765625, 173.274169921875, 111.18302917480469, 148.51593017578125, 616.33984375, 132.58993530273438, 439.8293151855469, 90.05354309082031, 416.35711669921875, 132.8756103515625, 187.2088623046875, -17.690025329589844, 494.46136474609375, 350.405517578125, -246.04434204101562, 402.6389465332031, 340.6341857910156, 1327.500244140625, -177.47071838378906, 411.68841552734375, 261.9447021484375, 27.36602783203125, -109.25885009765625, 570.0344848632812, 682.4041137695312, 804.0032958984375, 640.2943115234375, 498.6247863769531, 231.6812286376953, -64.19013977050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 246.35403442382812, "std": 322.8013610839844, "min": -397.6398620605469, "p10": -137.1652107238769, "median": 256.0776138305664, "p90": 616.1999816894532, "max": 1023.013427734375, "pos_frac": 0.734375, "sample": [51.383697509765625, 532.5082397460938, 395.505859375, -397.6398620605469, 89.54374694824219, 381.90057373046875, 178.0131072998047, 601.4559326171875, 415.98114013671875, 226.781005859375, 320.9877624511719, -84.7985610961914, 330.26177978515625, -162.97189331054688, 173.97935485839844, 538.228515625, 145.96841430664062, 479.49285888671875, 462.9389953613281, -56.380523681640625, -309.986328125, -24.586315155029297, 622.5188598632812, 16.963680267333984, 304.1200866699219, 657.8025512695312, -385.27447509765625, -29.719833374023438, 123.70623016357422, 131.79078674316406, 340.7789611816406, -154.42831420898438, 163.30967712402344, 300.77471923828125, -358.31146240234375, 393.11676025390625, 243.40126037597656, 423.2584533691406, 390.2583923339844, 1007.3863525390625, 461.9638671875, -27.144697189331055, 471.7895812988281, 477.38250732421875, 268.75396728515625, 399.163330078125, -52.84442138671875, 95.04490661621094, 499.9751892089844, 215.61053466796875, 105.3095474243164, 1023.013427734375, -87.54182434082031, -96.88463592529297, 241.8488311767578, 809.2542724609375, -48.7049560546875, 991.3875732421875, 322.598388671875, -28.698284149169922, 285.2452087402344, -184.13319396972656, 292.72967529296875, 851.5188598632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 241.1983642578125, "std": 372.67144775390625, "min": -578.0916748046875, "p10": -149.40523376464841, "median": 265.226806640625, "p90": 571.2216979980469, "max": 2110.766845703125, "pos_frac": 0.765625, "sample": [256.0103454589844, 321.15484619140625, 697.703857421875, 526.5288696289062, -578.0916748046875, -106.67838287353516, -275.1470642089844, 151.6275177001953, 364.8280029296875, 265.10858154296875, 503.9231872558594, 33.69660949707031, 331.11309814453125, 429.5475769042969, 276.1239013671875, 142.98989868164062, -156.6839599609375, 2110.766845703125, -132.42153930664062, 430.87359619140625, 78.45233154296875, 485.4653015136719, 571.7677001953125, 147.95497131347656, 186.2595672607422, -0.6104087829589844, 126.58602142333984, -60.762969970703125, 221.03599548339844, 280.192138671875, 152.2212677001953, -60.61859130859375, -227.51251220703125, -105.3910903930664, 174.17587280273438, 502.70489501953125, 795.433837890625, 586.2117919921875, -435.02520751953125, 306.6083068847656, 569.9476928710938, 636.4593505859375, 435.2099304199219, 116.94127655029297, 609.26953125, 236.1392364501953, 134.80078125, 59.23484802246094, 505.7422790527344, 413.96337890625, 530.3076171875, 278.712890625, 326.9518737792969, 265.34503173828125, 350.4617614746094, -35.49535369873047, -11.158843994140625, -273.7943115234375, -502.6589660644531, 267.82635498046875, 469.33526611328125, 107.7493896484375, 297.1153564453125, 330.1654357910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 142.20448303222656, "std": 279.0213928222656, "min": -565.9033813476562, "p10": -172.37402038574217, "median": 108.57180786132812, "p90": 483.901025390625, "max": 989.559326171875, "pos_frac": 0.734375, "sample": [24.652202606201172, 11.288055419921875, 87.77819061279297, -93.19972229003906, 174.53884887695312, 333.4363708496094, 44.555702209472656, 112.32422637939453, 9.448322296142578, 352.4273376464844, -110.05452728271484, 199.29908752441406, -213.2506103515625, -27.16881561279297, 80.05900573730469, 32.19165802001953, 355.9563903808594, 152.36993408203125, 53.51525115966797, -176.03070068359375, 247.52818298339844, 485.7889709472656, 591.806640625, 497.1729736328125, -118.8382339477539, 21.846481323242188, 989.559326171875, -217.6834259033203, -149.78530883789062, 323.76983642578125, 394.48760986328125, -79.56737518310547, 82.93940734863281, 7.525241851806641, -68.41768646240234, 242.4032745361328, 107.67829132080078, -34.863182067871094, 479.4958190917969, 159.1658935546875, 109.46532440185547, -208.61917114257812, 160.10589599609375, 191.38638305664062, 682.4317626953125, -263.2710876464844, 613.92578125, -89.54564666748047, -465.12091064453125, 28.21575164794922, 53.63789367675781, 294.3858947753906, -163.84176635742188, 406.8226318359375, 431.709228515625, 89.64387512207031, 790.5775756835938, 280.2242431640625, 291.62664794921875, -565.9033813476562, 280.6669616699219, 248.09030151367188, 292.983154296875, 245.3406524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 275.17437744140625, "std": 267.1435852050781, "min": -176.2922821044922, "p10": -19.411466217040985, "median": 225.27154541015625, "p90": 621.9507934570315, "max": 1105.177734375, "pos_frac": 0.890625, "sample": [14.073402404785156, 205.51837158203125, -153.29779052734375, 44.59048080444336, 191.3545684814453, -71.4085464477539, 654.32666015625, 245.47547912597656, 440.7699279785156, 387.50067138671875, 648.1217651367188, -141.36695861816406, 252.7434844970703, 149.86660766601562, 195.82752990722656, 23.988920211791992, 560.8851928710938, 827.6951904296875, 436.23406982421875, 223.04737854003906, -126.79766082763672, 372.12066650390625, 461.8553466796875, 25.41126823425293, 340.30084228515625, 317.2017822265625, 218.13113403320312, 1105.177734375, 11.50518798828125, 214.0609130859375, 255.9453887939453, 306.452880859375, 435.1617126464844, 171.28216552734375, 533.6119995117188, 227.49571228027344, 357.093994140625, 178.62002563476562, 197.20668029785156, 142.1254119873047, 384.2330322265625, 415.4674377441406, 32.510284423828125, -54.08019256591797, 541.0366821289062, 853.13671875, 346.94866943359375, 421.5434265136719, 402.6536560058594, -176.2922821044922, 138.0783233642578, 88.94529724121094, 91.67025756835938, 302.1407775878906, 165.6111297607422, 134.4827880859375, 313.890380859375, 49.39033126831055, 953.499755859375, 74.65443420410156, -32.661460876464844, 84.04827880859375, 399.5190734863281, 800.82275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 214.08514404296875, "std": 337.7460632324219, "min": -299.0036926269531, "p10": -211.5011199951172, "median": 196.25698852539062, "p90": 595.7366882324221, "max": 1506.598876953125, "pos_frac": 0.703125, "sample": [270.38629150390625, 760.0759887695312, -6.019901275634766, -142.01097106933594, 413.47564697265625, 446.570556640625, -241.583984375, 420.076171875, 332.0628662109375, 246.95791625976562, 108.8760986328125, 202.68649291992188, 186.54766845703125, 357.2952575683594, 305.84881591796875, 75.45233917236328, 1029.386962890625, 955.464599609375, 649.7869873046875, -7.911369323730469, 120.95179748535156, 545.200439453125, -221.17288208007812, -216.88206481933594, -97.52940368652344, -16.710643768310547, -219.1032257080078, 279.4391174316406, 324.1794128417969, 207.2442626953125, 125.47994995117188, 438.8548583984375, 500.29608154296875, 189.82748413085938, 518.9092407226562, 337.8350830078125, 250.655029296875, 134.8582763671875, 382.29376220703125, -67.89265441894531, 24.377395629882812, 176.3189239501953, 617.3950805664062, -211.47482299804688, -213.70867919921875, 494.0867919921875, 282.8099670410156, -211.51239013671875, 24.452980041503906, 187.10873413085938, 119.29810333251953, 219.63815307617188, -192.57081604003906, -121.5052719116211, -90.90277099609375, 355.7952575683594, 378.4310302734375, -299.0036926269531, 320.38818359375, 45.80529022216797, -146.61102294921875, 1506.598876953125, 687.409912109375, -131.3348388671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 234.58958435058594, "std": 292.0586242675781, "min": -331.77001953125, "p10": -87.57334976196289, "median": 196.43480682373047, "p90": 663.4596801757814, "max": 1066.419921875, "pos_frac": 0.796875, "sample": [80.10446166992188, 338.66021728515625, -224.5980987548828, 404.5583801269531, 181.11337280273438, -78.27154541015625, 81.01002502441406, 119.31474304199219, 567.4291381835938, 94.64370727539062, 591.0140380859375, -40.585304260253906, -331.77001953125, 31.795394897460938, -19.225296020507812, -91.5598373413086, -124.62555694580078, 756.984375, 696.8232421875, -201.56822204589844, 639.8465576171875, -161.8714599609375, 288.23291015625, 310.9678649902344, 193.28004455566406, 356.31658935546875, 251.84388732910156, 88.95329284667969, -45.065879821777344, 599.27880859375, 557.2828369140625, 728.4457397460938, -110.70075988769531, 398.2021484375, 135.5185546875, 50.16217041015625, 152.33291625976562, -64.42517852783203, 149.1873779296875, 673.57958984375, 47.60114288330078, 477.8304748535156, 37.26381301879883, 239.7515411376953, 66.83592224121094, 7.80877685546875, 493.4515686035156, 828.2688598632812, 233.60548400878906, 217.51156616210938, 383.79412841796875, 310.217041015625, 1066.419921875, 854.6658325195312, 334.40496826171875, 39.40416717529297, 76.64305114746094, -64.00143432617188, 199.58956909179688, 379.1211242675781, 221.80203247070312, 42.477294921875, 206.8798828125, 289.7721862792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 248.63189697265625, "std": 318.1784362792969, "min": -479.2042541503906, "p10": -77.55717163085936, "median": 206.88184356689453, "p90": 717.9365539550785, "max": 1007.108642578125, "pos_frac": 0.765625, "sample": [504.8277893066406, -90.28720092773438, 218.0230712890625, 78.0108642578125, 226.99278259277344, 339.8035888671875, 313.44989013671875, 207.95343017578125, -297.9802551269531, 287.4532470703125, -82.91316223144531, 546.2169799804688, 392.2084655761719, -33.86980438232422, 174.15231323242188, -26.602680206298828, 630.9627075195312, 340.3149719238281, 953.6895751953125, -65.05986022949219, 615.9290161132812, 10.191749572753906, 523.718994140625, -110.11473083496094, 119.8692398071289, 23.017364501953125, 197.91787719726562, 355.394287109375, -49.462486267089844, 67.02523040771484, 755.2110595703125, 929.5794067382812, 461.268798828125, 134.44618225097656, 335.38763427734375, 41.977630615234375, 178.23919677734375, 195.1060333251953, 878.9278564453125, 227.0474853515625, 491.60223388671875, 496.8323669433594, -479.2042541503906, -52.47047424316406, -262.2992248535156, 314.6773681640625, -57.16935729980469, 819.3170776367188, 72.3493423461914, -50.06745910644531, 101.5030517578125, -217.98983764648438, 61.063621520996094, 1007.108642578125, 795.4218139648438, 240.9871368408203, 292.51654052734375, 527.7261352539062, 205.8102569580078, 504.0270690917969, 422.4369812011719, 69.11805725097656, 109.83417510986328, -8.714221954345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 228.0292205810547, "std": 249.60231018066406, "min": -463.923095703125, "p10": -11.609563827514622, "median": 176.08390045166016, "p90": 561.29313659668, "max": 1003.7996826171875, "pos_frac": 0.890625, "sample": [182.57518005371094, 601.6510009765625, 87.34444427490234, 198.48190307617188, 114.57864379882812, 611.8349609375, 142.12396240234375, 218.74737548828125, 160.70046997070312, 319.1009216308594, 380.1354675292969, 276.51995849609375, 227.72164916992188, 407.9367980957031, -109.97515869140625, 152.4830780029297, 152.13119506835938, 56.63047790527344, 71.22685241699219, 189.76971435546875, 91.70328521728516, 388.9071044921875, 808.187744140625, 326.3334045410156, 200.68142700195312, 467.1247863769531, 242.2561492919922, 150.51422119140625, 196.88525390625, 688.4310302734375, -50.7912483215332, 356.091064453125, 134.39968872070312, 283.4513854980469, 63.57793045043945, 229.1687469482422, 384.07275390625, -173.34402465820312, 449.11248779296875, 647.6829223632812, 436.4268493652344, 15.4163818359375, -69.67681884765625, 461.4049987792969, 25.22015380859375, -23.19211196899414, 404.968994140625, 28.34656524658203, 50.829498291015625, 163.58749389648438, 863.7006225585938, 104.31635284423828, 36.2982177734375, -139.26593017578125, 58.62034606933594, 330.731689453125, 272.65069580078125, 146.26528930664062, 133.33670043945312, -463.923095703125, 126.92320251464844, 1003.7996826171875, 131.326904296875, 169.59262084960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 281.4169921875, "std": 285.406494140625, "min": -304.01959228515625, "p10": -55.20058517456054, "median": 237.7665786743164, "p90": 651.5865234375, "max": 970.4465942382812, "pos_frac": 0.84375, "sample": [-11.81740951538086, 559.2249145507812, 38.395660400390625, 408.1330261230469, 168.30474853515625, 45.36134719848633, 297.9049987792969, 307.50714111328125, 498.03759765625, 179.5789031982422, 651.81591796875, 19.209457397460938, 625.0701904296875, -79.57659912109375, 118.05233764648438, 330.9394836425781, 195.58294677734375, -154.64297485351562, 426.16259765625, 757.5555419921875, 226.48666381835938, 612.7662353515625, 341.44769287109375, 433.6848449707031, 897.131591796875, -43.81398010253906, 651.05126953125, 488.3609924316406, -103.40496826171875, 144.78440856933594, 587.7081909179688, 244.0891571044922, 69.18295288085938, 855.443359375, 388.33349609375, 270.958251953125, 144.38710021972656, 214.08819580078125, 174.82623291015625, 163.9366912841797, -304.01959228515625, -154.52117919921875, 60.75121307373047, -278.6636962890625, 512.8753662109375, 283.02825927734375, -60.08055877685547, 152.02972412109375, -5.129306793212891, 531.5895385742188, 19.566753387451172, 694.109375, 171.20639038085938, 970.4465942382812, 116.00403594970703, 815.8690185546875, 103.67735290527344, 231.44400024414062, 548.4656982421875, 138.33026123046875, 264.31207275390625, 307.92401123046875, 442.5434265136719, 306.68017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 265.7738037109375, "std": 334.4483642578125, "min": -419.98388671875, "p10": -105.69915771484372, "median": 171.81349182128906, "p90": 665.3736511230469, "max": 1278.6905517578125, "pos_frac": 0.796875, "sample": [78.30328369140625, 68.79896545410156, 459.4730529785156, 691.4706420898438, -119.07368469238281, 119.61988067626953, 323.88177490234375, -340.8445739746094, 60.13743591308594, -127.42687225341797, 517.2634887695312, -351.1575927734375, 627.040771484375, 407.66424560546875, 432.7030334472656, 932.587158203125, 145.11744689941406, 107.09294891357422, 643.30908203125, 313.70709228515625, 528.5096435546875, 167.94036865234375, 660.962890625, 429.5333251953125, 1042.056884765625, -16.287582397460938, 262.9154052734375, 167.7247772216797, 667.2639770507812, 397.73663330078125, 79.24411010742188, 196.66436767578125, 3.512939453125, -57.79542541503906, 234.67318725585938, 130.6961212158203, 105.74687957763672, 421.35394287109375, 113.81800079345703, 508.41607666015625, 146.1228790283203, 524.9852294921875, 163.18344116210938, 72.22452545166016, 142.25552368164062, -11.956916809082031, 392.23736572265625, 72.48373413085938, -74.49192810058594, 98.176513671875, 188.6820068359375, 1278.6905517578125, 576.3759765625, 864.4561157226562, -419.98388671875, 535.4085693359375, 401.678955078125, 488.5187072753906, 789.9426879882812, 175.68661499023438, -52.45841979980469, -23.9268798828125, -129.83306884765625, -223.28717041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 228.27647399902344, "std": 326.84832763671875, "min": -497.96844482421875, "p10": -190.0790618896484, "median": 212.89148712158203, "p90": 607.871209716797, "max": 1091.922607421875, "pos_frac": 0.75, "sample": [111.2961654663086, 369.07672119140625, 96.1523666381836, -284.47186279296875, 568.65771484375, -73.78441619873047, 645.0257568359375, 12.279216766357422, 93.59107208251953, -50.137847900390625, 288.7996520996094, -323.14569091796875, 616.2744140625, -44.3343505859375, 558.6206665039062, 199.7516632080078, 353.91265869140625, -94.6780014038086, 29.11564064025879, 117.59786987304688, 762.834716796875, 163.23924255371094, 409.98919677734375, 213.80104064941406, 569.5181884765625, 743.9073486328125, 232.65292358398438, 386.40277099609375, -324.66412353515625, 260.3190612792969, -36.42314147949219, 416.2350158691406, 210.43942260742188, 153.20530700683594, 429.4040222167969, -214.46670532226562, 1091.922607421875, 557.994140625, 574.5303955078125, -278.3992919921875, 242.03793334960938, 509.8276672363281, 196.6909637451172, -497.96844482421875, -63.525081634521484, 396.8349609375, 88.96499633789062, 741.0430908203125, 98.58147430419922, -133.174560546875, 422.7566223144531, 896.4905395507812, 487.08270263671875, 163.27635192871094, 588.2637329101562, 236.11085510253906, 211.98193359375, 239.882080078125, -436.9114685058594, 299.312744140625, 460.28057861328125, -114.05216979980469, 167.52862548828125, -103.66399383544922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 344.045166015625, "std": 314.2878723144531, "min": -525.0548095703125, "p10": -44.914993286132805, "median": 349.2593688964844, "p90": 719.8056213378907, "max": 1146.01513671875, "pos_frac": 0.828125, "sample": [285.4039611816406, 503.413330078125, 483.0567626953125, 263.4009094238281, 613.9256591796875, -77.46835327148438, 387.67791748046875, 240.71536254882812, -107.76323699951172, 615.9104614257812, 207.13101196289062, 126.70243072509766, 492.8912658691406, 262.76129150390625, -525.0548095703125, 784.7427978515625, 640.180908203125, 207.2238311767578, 352.394287109375, 377.57464599609375, 834.6338500976562, 612.656982421875, 81.18588256835938, 346.12445068359375, 64.9665298461914, 380.50885009765625, 64.42637634277344, 587.7901611328125, 330.75653076171875, 381.4183654785156, 387.80902099609375, 223.48638916015625, -93.12696075439453, 413.010498046875, 320.76861572265625, -250.02674865722656, 23.19304656982422, 787.9176025390625, 302.312744140625, 540.563232421875, -19.883865356445312, 253.0579833984375, 332.77728271484375, 1029.0135498046875, -17.516019821166992, 645.4102783203125, 289.78125, 192.9394989013672, -15.588024139404297, 1146.01513671875, 714.1561889648438, -35.60498046875, 218.11126708984375, 416.47015380859375, 370.3719787597656, 695.6285400390625, -217.1581573486328, 679.2606201171875, 661.2173461914062, 403.323974609375, 357.73760986328125, 722.226806640625, 770.84912109375, -48.904998779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 194.07742309570312, "std": 391.6370544433594, "min": -792.0859985351562, "p10": -247.56750030517577, "median": 145.54766082763672, "p90": 784.9466491699222, "max": 1147.45361328125, "pos_frac": 0.703125, "sample": [135.72764587402344, 457.9183349609375, -35.40327072143555, 262.93157958984375, 485.43817138671875, 78.60225677490234, -366.8739318847656, -388.7405700683594, -792.0859985351562, -115.24790954589844, 62.720848083496094, 314.7878723144531, -196.05584716796875, 89.79486846923828, -33.1893310546875, 2.0077152252197266, 348.273681640625, 271.9144592285156, 79.74124145507812, 48.18421936035156, -164.30459594726562, -138.8743133544922, -62.13996124267578, 965.675048828125, 26.512182235717773, 670.734619140625, 953.6725463867188, 466.62347412109375, 823.9515380859375, 209.62977600097656, 64.36652374267578, 302.8992919921875, -39.41976547241211, 18.684181213378906, -72.92150115966797, -485.9432678222656, 1015.8046875, -39.84809494018555, 227.1661834716797, 602.452880859375, 207.1689910888672, 557.9319458007812, 383.7966613769531, -47.685794830322266, 312.591796875, 204.2796630859375, 164.3371124267578, -253.86656188964844, 1147.45361328125, -495.62811279296875, 639.87451171875, 465.041748046875, 375.2810974121094, 155.36767578125, 865.8416748046875, 868.7982177734375, 26.380348205566406, 323.4189453125, 28.5511474609375, 48.25339889526367, 183.08319091796875, 693.9352416992188, -252.60501098632812, -235.8133087158203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 357.2535400390625, "std": 325.8533630371094, "min": -280.6949157714844, "p10": 11.554567718505869, "median": 323.57713317871094, "p90": 871.4367797851564, "max": 1497.8505859375, "pos_frac": 0.90625, "sample": [435.26123046875, 29.19685173034668, 21.280296325683594, 430.6014099121094, 269.04290771484375, 390.97515869140625, -10.415176391601562, 291.4540100097656, -56.74736785888672, 650.4483642578125, 424.2833251953125, 582.27197265625, 674.2379760742188, 208.5634002685547, -6.12493896484375, 609.051513671875, 7.3863983154296875, 316.1913146972656, 218.65032958984375, 395.1007080078125, 577.2897338867188, 227.20193481445312, 595.4015502929688, -232.62811279296875, 343.2000427246094, 492.53955078125, 25.419097900390625, 930.392333984375, 31.212142944335938, 955.414794921875, 215.04257202148438, 400.06884765625, 141.36683654785156, 437.1545715332031, 137.7393798828125, 278.1982116699219, 940.29345703125, 61.109283447265625, 148.60350036621094, 708.5074462890625, 26.246963500976562, -58.92728042602539, 887.27294921875, 961.31005859375, 197.01577758789062, 472.6783752441406, 834.4857177734375, 158.37429809570312, 330.96295166015625, 1497.8505859375, 430.45355224609375, 546.0496826171875, 62.15046691894531, 939.9923095703125, 307.2620849609375, 398.3729248046875, 341.30859375, 336.79119873046875, 157.40481567382812, -280.6949157714844, 385.292236328125, 158.68963623046875, 218.87881469726562, 260.7676086425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 271.0655822753906, "std": 314.705078125, "min": -381.6837158203125, "p10": -83.76760711669921, "median": 255.482177734375, "p90": 713.5037292480471, "max": 1169.70703125, "pos_frac": 0.796875, "sample": [204.66580200195312, 55.597511291503906, 96.19873809814453, 211.21533203125, 355.8578186035156, -52.7627067565918, 230.30081176757812, 392.27362060546875, -164.732421875, -225.89451599121094, 170.18466186523438, -73.56820678710938, -381.6837158203125, 550.8372802734375, 175.58840942382812, 847.7513427734375, 277.0861511230469, 28.389379501342773, 457.67498779296875, 316.5311279296875, 413.073974609375, 66.14141845703125, 736.822265625, -62.20862579345703, -128.68101501464844, 585.9720458984375, 517.7265014648438, 740.8839721679688, 261.7630310058594, 900.82177734375, -88.13877868652344, 491.5257263183594, 659.0938110351562, 29.668025970458984, -0.8364944458007812, 569.143310546875, -173.0356903076172, 294.9350891113281, 626.4074096679688, 538.785888671875, -169.57395935058594, 298.2428894042969, 1169.70703125, 23.007619857788086, 522.3363647460938, 120.19602966308594, 344.4153747558594, 738.9259033203125, 419.5684814453125, 889.8125610351562, 317.6270446777344, 491.5970153808594, 411.6504211425781, 107.78837585449219, -68.19247436523438, 20.070816040039062, 25.69850730895996, 402.6734313964844, 59.3126220703125, 69.72207641601562, 332.0202331542969, 249.20132446289062, -71.5987548828125, 192.6143798828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 263.43988037109375, "std": 363.78411865234375, "min": -555.8056030273438, "p10": -223.48321533203125, "median": 252.4734649658203, "p90": 694.6270080566406, "max": 1172.23388671875, "pos_frac": 0.75, "sample": [-398.58709716796875, 327.6866760253906, 50.795310974121094, 686.714111328125, 538.4253540039062, 190.0980224609375, 210.7754669189453, 977.9619140625, 138.228759765625, 811.83349609375, 1172.23388671875, 250.7930145263672, 287.39044189453125, 832.33251953125, -65.72052764892578, -378.5158386230469, 909.2470092773438, 75.37761688232422, -247.75709533691406, 180.4603729248047, -98.67617797851562, 7.768592834472656, -134.83509826660156, 435.64007568359375, 218.7491455078125, 313.3425598144531, -196.87301635742188, 451.6019592285156, 254.15391540527344, 361.0557861328125, 541.0692749023438, -225.09912109375, 439.25909423828125, 179.80453491210938, 637.8221435546875, 155.09848022460938, -238.02146911621094, 144.6197967529297, 318.04254150390625, 519.4163818359375, -555.8056030273438, 428.03826904296875, 91.33177185058594, 615.0941162109375, 244.46078491210938, -297.12469482421875, -219.7127685546875, 517.7722778320312, 612.0277099609375, -103.37120056152344, 436.5415344238281, -24.714515686035156, 384.36553955078125, 424.2945556640625, 567.899658203125, 698.0182495117188, 322.13909912109375, 78.54414367675781, -24.676721572875977, -67.16664123535156, 922.4141235351562, 114.42406463623047, 653.79443359375, 407.8511962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 217.84002685546875, "std": 404.91845703125, "min": -511.0329895019531, "p10": -299.5820343017578, "median": 163.58800506591797, "p90": 828.4332519531253, "max": 1269.725341796875, "pos_frac": 0.671875, "sample": [873.0358276367188, 854.9925537109375, 247.57833862304688, -213.7643280029297, -437.162841796875, -49.187835693359375, -500.2620849609375, 82.49185180664062, 395.4940185546875, -314.45648193359375, 53.89806365966797, 1187.623046875, 549.8060302734375, 151.53836059570312, 61.26665496826172, 1269.725341796875, -33.53619384765625, -60.217750549316406, 128.43023681640625, -143.99365234375, 129.73336791992188, 258.73248291015625, 160.19349670410156, 465.89642333984375, 166.98251342773438, 205.02200317382812, -161.27699279785156, 501.3101501464844, -407.3421630859375, -85.51351165771484, 418.0760192871094, -2.1524829864501953, 395.3206787109375, -55.10839080810547, 435.89013671875, 104.54290008544922, 277.7116394042969, -379.6424560546875, -7.625314712524414, 58.689395904541016, 496.3952331542969, 144.51211547851562, 766.4615478515625, 400.67730712890625, -43.047855377197266, 172.29656982421875, 600.59375, -292.3664245605469, 261.0651550292969, 308.075927734375, 388.19732666015625, -291.8199462890625, 237.75680541992188, 913.1361083984375, 78.21475982666016, -511.0329895019531, 577.060791015625, 306.5197448730469, -42.39398193359375, 979.8267822265625, 906.027099609375, -302.6744384765625, 558.0189208984375, 747.5227661132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 279.05023193359375, "std": 341.32745361328125, "min": -403.32452392578125, "p10": -92.38053131103516, "median": 208.1214828491211, "p90": 757.0589294433597, "max": 1224.117919921875, "pos_frac": 0.828125, "sample": [407.0919494628906, 195.66050720214844, 215.66445922851562, 81.21501159667969, 195.3914794921875, 219.55560302734375, 183.28195190429688, -369.42840576171875, 1154.3682861328125, 175.80929565429688, 412.5492248535156, 260.86993408203125, 1224.117919921875, 489.900390625, 304.7730712890625, 26.692089080810547, 489.907470703125, 336.3619079589844, 500.94427490234375, -99.36963653564453, 860.2376708984375, 364.6903076171875, 362.136962890625, -97.23384094238281, 94.1142349243164, 204.3569793701172, 100.06256103515625, -88.07266235351562, 526.6250610351562, 39.644927978515625, -94.22676086425781, 198.85520935058594, 540.3941650390625, -326.6962890625, -177.5645751953125, 518.7217407226562, 846.193115234375, 130.160888671875, 168.01856994628906, -1.0203380584716797, 178.27044677734375, 149.15255737304688, 423.8244934082031, -36.79438018798828, 19.778045654296875, 39.059547424316406, 791.1318359375, 354.39959716796875, 36.8890380859375, 211.885986328125, 671.9127807617188, 677.5554809570312, 323.03125, 22.451156616210938, -403.32452392578125, 25.677473068237305, 844.0537109375, 600.0745239257812, 1098.4593505859375, 385.06512451171875, 280.3732604980469, 532.9655151367188, 71.10813903808594, -12.541099548339844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 178.38414001464844, "std": 333.2248229980469, "min": -878.3494873046875, "p10": -271.9117034912109, "median": 156.9957733154297, "p90": 612.5469238281252, "max": 851.042236328125, "pos_frac": 0.71875, "sample": [-299.66497802734375, 530.8900756835938, 527.8768310546875, -289.2080078125, -303.4908142089844, -97.4142074584961, 309.6158447265625, -180.15859985351562, -304.3280944824219, 274.03277587890625, 446.87579345703125, 121.12017822265625, 353.82080078125, 646.1799926757812, 627.300537109375, 28.745927810668945, -72.52838134765625, 533.6325073242188, 74.74597930908203, -126.09666442871094, 365.3883361816406, 111.33990478515625, 76.52436828613281, 143.67550659179688, 132.7274627685547, 578.121826171875, 236.8576202392578, 250.82272338867188, 70.44831848144531, 135.89039611816406, -244.05801391601562, 212.680419921875, 10.678169250488281, -121.80408477783203, 44.795982360839844, 157.4920196533203, 516.7310180664062, 744.5861206054688, 111.96665954589844, -878.3494873046875, 778.7386474609375, 358.9229736328125, 392.52899169921875, -11.238792419433594, 465.9617919921875, 240.7833709716797, 228.565185546875, 851.042236328125, 393.93267822265625, 299.529541015625, 639.54541015625, -220.70867919921875, -108.05238342285156, 559.9603271484375, -25.46674346923828, 90.82329559326172, 156.49952697753906, 558.8072509765625, -283.8489990234375, -389.77752685546875, 635.9051513671875, 258.8665771484375, -188.50624084472656, 275.30865478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 201.75115966796875, "std": 270.99176025390625, "min": -291.45611572265625, "p10": -150.18580169677733, "median": 194.3045883178711, "p90": 600.2791259765626, "max": 796.8251953125, "pos_frac": 0.75, "sample": [34.99134826660156, 672.9130249023438, 796.8251953125, -30.2135009765625, 418.82147216796875, 367.4573059082031, -151.653076171875, 178.82447814941406, -141.30889892578125, -146.7621612548828, -9.740461349487305, -291.45611572265625, 97.32599639892578, 33.07550048828125, -18.301292419433594, -195.97654724121094, 117.21568298339844, 247.63565063476562, 241.5313720703125, 30.69898796081543, 182.6925506591797, 326.23614501953125, -126.80828857421875, 403.8246765136719, -108.90782165527344, 280.5015563964844, 11.820699691772461, 253.75253295898438, 76.9809341430664, 747.2182006835938, 153.45201110839844, 217.22695922851562, 577.312255859375, -40.139892578125, -223.16676330566406, -212.56582641601562, 243.9075927734375, 264.7491455078125, 67.55583190917969, 193.37863159179688, 438.7261657714844, 413.5558776855469, 369.20574951171875, 541.0822143554688, 184.8409881591797, -222.9622802734375, 719.3916625976562, 13.018270492553711, -16.291210174560547, 34.318511962890625, 195.2305450439453, 610.1220703125, 259.5001220703125, -181.3436279296875, 351.0904541015625, 310.0914001464844, 78.91300201416016, 507.8533935546875, 254.1031951904297, 316.52569580078125, 664.6029663085938, 664.611328125, 524.7001953125, 340.2620849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 198.47232055664062, "std": 290.9932861328125, "min": -362.5634765625, "p10": -134.6941192626953, "median": 160.7114715576172, "p90": 560.4001037597658, "max": 920.315185546875, "pos_frac": 0.734375, "sample": [-48.839019775390625, -335.1283264160156, 336.46026611328125, 204.33729553222656, 252.3661346435547, 433.4376220703125, -59.478981018066406, -112.0166015625, -107.76959228515625, 70.32383728027344, 526.303466796875, 478.0330505371094, -277.67901611328125, 198.24063110351562, 453.3197937011719, 182.94912719726562, 231.186767578125, 286.50286865234375, 197.04019165039062, 829.712158203125, 370.0084533691406, -144.41305541992188, 371.489501953125, 2.458110809326172, 662.471923828125, -165.69073486328125, -22.281532287597656, 44.533226013183594, -86.42520141601562, 154.71714782714844, 352.920654296875, 721.7529907226562, 574.5177001953125, -292.8672790527344, 42.147193908691406, 920.315185546875, -6.339073181152344, 99.56953430175781, -362.5634765625, 119.69303131103516, 503.6047668457031, -78.49950408935547, 527.4590454101562, 139.40431213378906, 209.87646484375, 296.23504638671875, 80.03291320800781, 114.02884674072266, 786.7655639648438, 491.8466796875, -52.522315979003906, 166.70579528808594, -161.4492950439453, 234.8139190673828, 147.1240997314453, 466.32330322265625, 364.5005798339844, 331.3297119140625, 136.91177368164062, 51.36128234863281, 767.1597900390625, -79.00273132324219, 15.924560546875, 146.97824096679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 232.30198669433594, "std": 286.5107421875, "min": -535.0966796875, "p10": -105.58480911254881, "median": 227.4613037109375, "p90": 582.5942749023438, "max": 858.8226318359375, "pos_frac": 0.796875, "sample": [221.06707763671875, 318.02655029296875, -128.93743896484375, 70.34583282470703, 591.2093505859375, 336.39434814453125, 506.8806457519531, 460.1827087402344, 483.1698303222656, 22.738187789916992, 126.80311584472656, 233.85552978515625, 11.031234741210938, 474.195556640625, -110.7648696899414, -114.485107421875, 610.795654296875, -453.70880126953125, -17.59213638305664, 398.013427734375, 416.19744873046875, 708.615966796875, -93.49800109863281, 148.8526153564453, 562.492431640625, 687.3867797851562, 460.101806640625, 23.802730560302734, 290.12066650390625, 858.8226318359375, 269.12322998046875, 18.73137664794922, 363.5834045410156, 45.8282470703125, 453.412841796875, -2.949005126953125, -47.10419464111328, -29.995458602905273, 124.354736328125, -181.53875732421875, 180.77354431152344, 388.9828186035156, 165.2056884765625, 384.9293212890625, 208.22506713867188, -535.0966796875, 263.3415222167969, 177.33290100097656, 731.2334594726562, 355.4788818359375, 113.19802856445312, -37.499229431152344, 431.57958984375, 479.5805358886719, 241.1193389892578, 735.6608276367188, 150.84535217285156, 505.070068359375, -368.54901123046875, 157.5377655029297, 301.7867736816406, 5.793874740600586, 519.0045166015625, 196.2299346923828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 239.39259338378906, "std": 336.6127014160156, "min": -585.7687377929688, "p10": -70.27261962890624, "median": 165.41920471191406, "p90": 686.3760620117188, "max": 1394.1492919921875, "pos_frac": 0.765625, "sample": [257.8658447265625, -20.074378967285156, -71.18950653076172, 101.8266830444336, 518.7935791015625, 161.2198486328125, 132.98660278320312, 223.2772674560547, 357.815185546875, 387.54901123046875, 212.931396484375, -40.70734405517578, 167.9700927734375, 21.724613189697266, 26.73048973083496, -36.58837890625, 488.07049560546875, 323.0062255859375, 71.0421142578125, 456.0674743652344, 833.4335327148438, 304.8779602050781, -196.09140014648438, -24.12837028503418, 113.72293853759766, 163.3664093017578, 115.11439514160156, 221.4120330810547, 111.95535278320312, 1080.0069580078125, 158.01220703125, 396.0979919433594, 893.9957885742188, -68.13321685791016, 545.8621215820312, 12.928714752197266, 303.80609130859375, 672.88525390625, 748.7614135742188, 143.58392333984375, 73.0859603881836, 348.9942626953125, 154.14654541015625, 321.79803466796875, 156.56539916992188, 339.68939208984375, -6.593822479248047, -585.7687377929688, 1394.1492919921875, 346.7071533203125, 167.4720001220703, 335.5239562988281, 629.035888671875, 692.1578369140625, 578.4039916992188, -179.70587158203125, -12.542121887207031, -408.69854736328125, 174.23880004882812, 14.983625411987305, 795.1365966796875, -150.81381225585938, -46.61183166503906, -82.01609802246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 245.37954711914062, "std": 324.2710266113281, "min": -564.33251953125, "p10": -115.28630752563475, "median": 228.60459899902344, "p90": 604.0424743652344, "max": 1249.72509765625, "pos_frac": 0.765625, "sample": [-85.9886703491211, 184.72528076171875, 433.41033935546875, 475.326904296875, 376.803955078125, -98.15300750732422, 683.8281860351562, -22.94153594970703, 221.441650390625, 577.8729248046875, 196.07601928710938, 354.1750183105469, 21.1356201171875, 370.2751770019531, -3.2599048614501953, 551.8024291992188, 358.68170166015625, 99.01239776611328, -564.33251953125, 242.9127197265625, 53.94702911376953, 13.738685607910156, 593.4558715820312, 196.3246612548828, 75.59698486328125, 392.906494140625, 17.667816162109375, 345.5808410644531, -173.77688598632812, 235.76754760742188, -43.29857635498047, 73.78595733642578, 159.5045166015625, 263.8442687988281, 173.974609375, 4.53901481628418, -324.10076904296875, -144.9800567626953, 163.95346069335938, 933.91455078125, 40.150177001953125, -477.437744140625, 364.5069580078125, 608.57958984375, 699.3292236328125, -146.5635528564453, 547.0054931640625, 720.6062622070312, -9.77899169921875, 513.601318359375, 353.2504577636719, -52.43536376953125, 155.09156799316406, 659.2053833007812, 572.9342041015625, 1249.72509765625, -50.48625564575195, 416.83270263671875, 384.81982421875, -122.629150390625, 498.7862548828125, 290.980712890625, 589.63623046875, 513.4307861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 245.4788055419922, "std": 295.04693603515625, "min": -759.2752075195312, "p10": -121.9058891296386, "median": 233.48898315429688, "p90": 591.4459655761718, "max": 972.2948608398438, "pos_frac": 0.84375, "sample": [82.46420288085938, 64.74421691894531, 173.859619140625, 371.3227233886719, 128.61090087890625, 116.23698425292969, 202.67547607421875, 218.56939697265625, 64.5156021118164, 233.59744262695312, 611.9127807617188, 932.9600219726562, -44.11756134033203, 237.27337646484375, -37.878562927246094, -12.707807540893555, 384.285888671875, 107.88131713867188, 93.16123962402344, 755.2806396484375, 88.8392333984375, 482.41473388671875, 441.0541076660156, -759.2752075195312, 85.75555419921875, 333.18609619140625, 281.622802734375, 229.922607421875, 626.78857421875, 972.2948608398438, 325.6551513671875, 339.4324951171875, 572.2025756835938, 354.63214111328125, -163.04055786132812, 529.8591918945312, 339.29827880859375, 172.32940673828125, 333.76800537109375, 325.52105712890625, 121.58152770996094, 212.5345001220703, 551.571044921875, -262.11114501953125, 590.4013061523438, 421.71759033203125, 233.38052368164062, -249.80996704101562, -241.7978973388672, -367.72119140625, 184.4762725830078, 443.35699462890625, 410.63482666015625, 651.2550048828125, 15.002330780029297, 414.2746276855469, 274.6565246582031, 591.8936767578125, 200.13572692871094, 312.314453125, 141.11932373046875, 212.4640655517578, 407.64813232421875, -155.24374389648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 263.87225341796875, "std": 308.20587158203125, "min": -685.6654052734375, "p10": -16.96663246154785, "median": 253.75397491455078, "p90": 621.1368530273438, "max": 987.689208984375, "pos_frac": 0.859375, "sample": [325.1654357910156, 247.98435974121094, 398.081787109375, 254.1962432861328, -15.98781967163086, 925.4614868164062, 178.46197509765625, -685.6654052734375, 29.064956665039062, 237.88604736328125, 324.931884765625, 238.73223876953125, 580.1802368164062, 854.9637451171875, 268.0178527832031, -148.36570739746094, 350.54217529296875, 78.10791015625, -62.400413513183594, 104.97412872314453, 279.3891296386719, 504.69476318359375, 4.607646942138672, 149.71731567382812, 524.0330200195312, 298.49041748046875, -161.27175903320312, -14.183244705200195, 467.76214599609375, 75.83412170410156, 592.324951171875, 472.15185546875, 9.433490753173828, 318.94769287109375, 744.1170654296875, 145.51051330566406, 397.91241455078125, -17.386123657226562, -379.38031005859375, 302.986572265625, 315.061279296875, 221.6298828125, 610.8318481445312, -244.05545043945312, 84.26673889160156, 500.6856689453125, 31.126113891601562, 625.5532836914062, 85.64420318603516, 23.964977264404297, 351.112548828125, 145.5148468017578, 149.81436157226562, 82.79845428466797, 253.31170654296875, 322.95404052734375, 797.698974609375, 25.162811279296875, 466.5517578125, 963.8615112304688, 987.689208984375, 396.649658203125, 124.67927551269531, 365.2909851074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 204.3395233154297, "std": 295.46807861328125, "min": -753.8831176757812, "p10": -104.02069244384765, "median": 208.49178314208984, "p90": 610.0809204101563, "max": 1000.48388671875, "pos_frac": 0.75, "sample": [317.6643371582031, -70.9969482421875, 333.6731262207031, 715.1763305664062, 18.946670532226562, 661.75146484375, 611.2962646484375, 159.11631774902344, -35.37468719482422, 202.7638397216797, 478.5715637207031, 332.4687805175781, 467.39434814453125, 428.773193359375, -110.67086029052734, -85.68467712402344, 229.43881225585938, -231.39059448242188, 354.1739501953125, 237.56578063964844, 7.260631561279297, 49.5811767578125, 6.64508056640625, 663.407958984375, -224.99465942382812, 131.95816040039062, 722.235595703125, -92.96219635009766, 82.63202667236328, 438.45782470703125, -17.4678955078125, 391.6429138183594, 248.4730224609375, 64.66375732421875, 1000.48388671875, -315.1251220703125, -65.16740417480469, 122.63162231445312, 485.4842529296875, 729.4208984375, 81.49070739746094, 360.54132080078125, 263.4334716796875, 94.639892578125, 97.736083984375, 197.03770446777344, -1.1764888763427734, 293.57818603515625, 429.6053771972656, -108.76004791259766, 214.2197265625, 449.52484130859375, 287.7449035644531, 14.525054931640625, 322.749755859375, -17.277814865112305, 269.7605285644531, -1.000885009765625, 358.8459167480469, -212.16970825195312, 607.2451171875, 104.90763854980469, -753.8831176757812, 280.4930725097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 314.61083984375, "std": 289.9212341308594, "min": -156.83425903320312, "p10": -48.115246582031226, "median": 255.2853546142578, "p90": 751.4817810058595, "max": 1005.3121337890625, "pos_frac": 0.84375, "sample": [813.5842895507812, 181.82806396484375, 129.85687255859375, 411.8358154296875, -71.78609466552734, 600.9575805664062, 222.33831787109375, 116.3216781616211, 508.99322509765625, 760.7378540039062, 76.39266204833984, -23.707916259765625, 76.72501373291016, 288.88623046875, 256.2278137207031, 595.0712890625, 331.4091491699219, 15.164024353027344, 610.5999145507812, 170.3655242919922, 401.5649108886719, 490.4134826660156, 33.28193283081055, 161.64620971679688, 596.4578247070312, 254.3428955078125, 289.84075927734375, 860.8101196289062, 136.50039672851562, 773.0875854492188, 231.76023864746094, 579.98828125, 1005.3121337890625, -156.83425903320312, -130.219970703125, 91.0326156616211, 562.60302734375, 498.35345458984375, 19.77422332763672, 837.7950439453125, -1.1453323364257812, 980.5387573242188, 281.5289611816406, 193.46446228027344, 479.81256103515625, 463.69329833984375, 344.48760986328125, 531.527587890625, 234.5666046142578, 82.5363540649414, -6.421283721923828, 187.2821502685547, -119.21957397460938, 729.88427734375, 354.0815734863281, -58.575531005859375, 367.168701171875, -64.46434020996094, 218.77114868164062, -74.27275085449219, 111.7750473022461, 239.14834594726562, 425.7979736328125, 623.81298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 265.21044921875, "std": 266.8511657714844, "min": -291.6026611328125, "p10": -85.79049606323241, "median": 249.44036102294922, "p90": 646.404345703125, "max": 970.5189208984375, "pos_frac": 0.859375, "sample": [39.288429260253906, 22.959365844726562, -24.9707088470459, 275.40911865234375, 28.471675872802734, 500.3551025390625, 79.51985168457031, 266.4237060546875, -115.0362777709961, 688.6952514648438, 18.48892593383789, 633.6005859375, 395.07879638671875, 138.2158660888672, 270.4832763671875, -80.75584411621094, -104.94816589355469, 732.5029296875, 660.887451171875, 45.787261962890625, 619.4815673828125, 258.7251892089844, 752.675537109375, 206.4693603515625, 590.9976196289062, 970.5189208984375, 272.31170654296875, 249.87985229492188, 207.93429565429688, 334.41729736328125, 106.12332916259766, 171.5850067138672, -127.54096221923828, 131.57968139648438, 372.3753356933594, 611.8463745117188, 55.20242691040039, 313.7015380859375, 387.132568359375, 226.6913604736328, 83.39361572265625, 301.6904602050781, 249.00086975097656, 311.23699951171875, -102.70016479492188, 288.48077392578125, 134.26055908203125, 180.32864379882812, 520.1967163085938, 280.788330078125, 468.6494140625, -291.6026611328125, 211.07943725585938, 397.9714050292969, 22.992652893066406, -209.07211303710938, 231.74139404296875, 117.60582733154297, 698.02294921875, 503.6439208984375, 647.3719482421875, 189.62413024902344, 644.1466064453125, -87.94820404052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 271.8375244140625, "std": 367.86236572265625, "min": -420.32135009765625, "p10": -129.68718566894532, "median": 225.90192413330078, "p90": 701.88798828125, "max": 1326.338623046875, "pos_frac": 0.75, "sample": [238.68508911132812, -130.15191650390625, 263.5924072265625, -69.08641052246094, 159.44801330566406, 935.8048706054688, 72.24825286865234, 80.37384033203125, 684.8629150390625, 531.6685180664062, -21.57220458984375, -56.82080078125, 412.58001708984375, 30.025489807128906, 1187.87841796875, 555.7446899414062, 332.54949951171875, -116.11476135253906, 459.8575134277344, 75.21759033203125, -294.9643249511719, 709.1844482421875, -181.758544921875, 540.7833251953125, 236.49368286132812, -4.1228179931640625, 292.89208984375, 641.0833129882812, 528.8959350585938, 243.19754028320312, 530.40966796875, 68.13584899902344, 134.25119018554688, 855.6302490234375, -420.32135009765625, -85.04570770263672, -258.7341003417969, 440.64984130859375, 215.31016540527344, 486.524658203125, 1326.338623046875, -58.75946807861328, 73.94305419921875, 146.81045532226562, 307.92340087890625, 117.06425476074219, 181.3717041015625, 150.4403076171875, -284.15301513671875, -239.98289489746094, 378.55340576171875, 74.99522399902344, 568.5369873046875, -64.51841735839844, 590.8327026367188, 281.326416015625, 584.9093017578125, 1039.1263427734375, 158.42929077148438, 46.72831344604492, 585.118408203125, 925.296630859375, 330.5880432128906, -128.60281372070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 300.7904052734375, "std": 325.4169921875, "min": -478.090576171875, "p10": -82.29907150268554, "median": 289.81219482421875, "p90": 752.9817382812502, "max": 972.4583740234375, "pos_frac": 0.875, "sample": [792.036865234375, -422.2019348144531, 360.4788513183594, 551.8671875, 103.36133575439453, 768.051025390625, 65.12753295898438, -71.55897521972656, 25.92193603515625, 676.590087890625, 828.4088134765625, 160.61785888671875, 290.67547607421875, 563.864501953125, 460.639892578125, 525.4240112304688, 244.64437866210938, 168.58526611328125, -478.090576171875, 57.40691375732422, -86.90196990966797, 771.81640625, -247.4384002685547, 300.92364501953125, 530.5851440429688, -292.45343017578125, 162.69277954101562, 164.22264099121094, 528.559814453125, 880.72216796875, 449.1646728515625, 61.25994873046875, 414.19805908203125, 288.94891357421875, 466.64300537109375, 96.46385955810547, 120.22200012207031, 90.85064697265625, 510.7027282714844, 394.0606689453125, 678.8692626953125, 185.99249267578125, 717.820068359375, 53.42283248901367, 88.2779541015625, 49.68577194213867, 23.479555130004883, -166.51296997070312, 675.8170166015625, 579.3427124023438, 625.481201171875, 427.2608642578125, 119.37969970703125, 972.4583740234375, 423.5362243652344, 449.47894287109375, 551.51171875, 286.37188720703125, 104.89511108398438, 769.9144897460938, -217.7268524169922, 6.68603515625, 181.41790771484375, 386.6317443847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 187.75083923339844, "std": 403.23602294921875, "min": -826.8560791015625, "p10": -270.9494506835937, "median": 121.58061218261719, "p90": 629.1791259765625, "max": 1253.683349609375, "pos_frac": 0.6875, "sample": [-135.00323486328125, 233.40737915039062, 44.939476013183594, -11.98074722290039, 632.899169921875, -131.99734497070312, -307.9136657714844, -20.070953369140625, 211.7815399169922, 620.4990234375, -40.29292297363281, 10.655403137207031, -35.68507766723633, 180.98081970214844, -22.4449462890625, 124.58246612548828, 561.5599365234375, 317.92059326171875, 81.53419494628906, 212.96658325195312, -59.29296875, 267.5622253417969, -192.98178100585938, 448.3335876464844, -121.11691284179688, 116.25839233398438, 27.222946166992188, 537.9200439453125, 112.67546844482422, -92.5941162109375, 528.77392578125, 497.77581787109375, 484.21295166015625, -375.2394104003906, 118.5787582397461, 207.01817321777344, -332.7674560546875, 20.33448028564453, 700.4515380859375, 760.7384033203125, -73.62114715576172, -826.8560791015625, 642.4313354492188, 484.0473937988281, 614.0067749023438, 151.88258361816406, 581.615478515625, 473.06512451171875, 400.94744873046875, 360.4473571777344, 24.16385841369629, -494.4760437011719, 304.0509033203125, 44.56058120727539, 1.8300895690917969, 1218.6142578125, -765.7289428710938, 79.85859680175781, 167.4933319091797, 349.88812255859375, 1253.683349609375, -79.9851303100586, -304.3641662597656, 1226.296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 211.4017333984375, "std": 318.4971008300781, "min": -848.3985595703125, "p10": -169.430354309082, "median": 261.74468994140625, "p90": 622.2411254882813, "max": 830.1485595703125, "pos_frac": 0.765625, "sample": [333.8262634277344, 87.94387817382812, 121.04656219482422, -90.53839874267578, 256.9185791015625, 470.80023193359375, 826.7670288085938, -66.8096923828125, 105.12391662597656, 104.81352233886719, 310.138916015625, 13.44228744506836, 574.5909423828125, 701.0923461914062, 495.420654296875, 692.5531005859375, 320.5396728515625, -224.2348175048828, 113.29813385009766, 151.34451293945312, 740.5384521484375, 492.448486328125, 68.65716552734375, -226.03024291992188, 282.203369140625, 216.40309143066406, 442.8592529296875, 290.9576721191406, 20.946876525878906, 253.90065002441406, 636.5487060546875, 397.8533630371094, 633.2514038085938, 297.60931396484375, 314.4468688964844, -536.2372436523438, 513.4213256835938, 385.99163818359375, -414.9674377441406, 596.5504760742188, 310.7779541015625, -19.091711044311523, 362.3736572265625, 830.1485595703125, 267.7745666503906, 263.806396484375, 183.4293212890625, -42.82392883300781, -11.471364974975586, 494.27691650390625, 126.55654907226562, -25.600303649902344, 263.40325927734375, -848.3985595703125, -183.6782989501953, 111.04588317871094, 14.01861572265625, -136.18515014648438, -84.1063232421875, 276.0489501953125, 260.08612060546875, -282.9376220703125, 339.6112976074219, 355.2147216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 240.4937286376953, "std": 346.0240173339844, "min": -833.665283203125, "p10": -146.56182708740232, "median": 233.0600814819336, "p90": 656.9631958007814, "max": 1227.737060546875, "pos_frac": 0.78125, "sample": [268.97998046875, 680.4295654296875, 821.1386108398438, 608.4952392578125, 333.0374755859375, -243.5912628173828, -534.4053344726562, 718.04443359375, -68.71749114990234, 1227.737060546875, -127.81996154785156, 176.81544494628906, 201.53001403808594, 50.0435791015625, 450.2664794921875, 197.25588989257812, 43.43541717529297, -122.99337005615234, -28.981197357177734, 165.56202697753906, 22.831878662109375, 773.4901123046875, 343.2913818359375, 116.46710205078125, 452.0050354003906, 188.01864624023438, 142.35153198242188, 628.4808349609375, 325.7122497558594, 595.8539428710938, 731.0338745117188, 388.11370849609375, 344.2355651855469, 222.7183380126953, 330.7443542480469, 401.73516845703125, 318.4914855957031, -381.286376953125, 336.1896057128906, 4.1590576171875, -33.41399002075195, 386.7437438964844, 243.40182495117188, -833.665283203125, -18.87374496459961, 574.8304443359375, 507.4732666015625, 87.37129211425781, 669.169921875, -165.5400848388672, 431.5791931152344, 208.7519989013672, 48.94244384765625, 70.67558288574219, 563.73291015625, 444.9200744628906, 312.4207763671875, -83.44154357910156, 522.39501953125, 478.9706115722656, -154.59405517578125, 201.7233428955078, 211.86550903320312, -384.73974609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 258.9209899902344, "std": 313.658203125, "min": -489.8331298828125, "p10": -81.57417297363281, "median": 212.2914047241211, "p90": 758.5272888183596, "max": 1006.47314453125, "pos_frac": 0.78125, "sample": [260.154296875, 86.20381927490234, 192.50892639160156, 8.1004638671875, -79.43441772460938, -332.8974304199219, 792.9271240234375, 225.44189453125, 573.4105224609375, 503.1217346191406, 781.1236572265625, -134.91531372070312, 851.0057373046875, 280.193359375, 419.5838928222656, -48.33700942993164, 811.5465087890625, 804.4537963867188, 848.13134765625, 705.8024291992188, -57.1466064453125, -11.739990234375, 363.1622314453125, 158.32464599609375, 596.958251953125, 214.2406005859375, 100.63716888427734, 81.76287841796875, -489.8331298828125, -231.94235229492188, 358.099365234375, -117.41172790527344, 201.09701538085938, 335.71746826171875, 635.85888671875, 219.679931640625, -19.14250373840332, 422.08319091796875, -205.53627014160156, 83.92388916015625, 271.59722900390625, 575.3790283203125, 207.26296997070312, 383.3088684082031, 1006.47314453125, 194.3548126220703, 398.7322082519531, -27.155593872070312, 435.16510009765625, 56.04798889160156, 57.35859298706055, 210.3422088623047, 488.1495361328125, 486.8157958984375, 147.9818115234375, 217.38414001464844, -82.4912109375, 191.83761596679688, 145.45803833007812, -55.520904541015625, 590.4669189453125, 78.27860260009766, 104.23538208007812, 302.5630187988281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 251.95314025878906, "std": 342.9269714355469, "min": -290.09210205078125, "p10": -199.50229187011718, "median": 226.2011260986328, "p90": 682.5895568847658, "max": 1229.4580078125, "pos_frac": 0.78125, "sample": [-261.8551940917969, 333.129150390625, 9.213912963867188, -87.96827697753906, -107.1405029296875, 587.299072265625, 401.3652648925781, 313.6683044433594, 89.56436920166016, -290.09210205078125, 224.15530395507812, 269.17449951171875, -52.33555603027344, -265.0260925292969, 267.0697937011719, 701.772216796875, 458.8330078125, 922.3492431640625, 127.65742492675781, 416.4333190917969, 528.6829833984375, 218.24684143066406, 615.187744140625, 645.1026000976562, 336.1787414550781, -128.8975830078125, 229.48519897460938, 44.352149963378906, -203.50482177734375, -220.3966064453125, 131.50416564941406, 1157.1868896484375, -190.16305541992188, 304.5628356933594, 438.4219970703125, -76.18048095703125, 370.9271545410156, -269.99566650390625, 228.2469482421875, 48.873992919921875, -95.11785125732422, 129.0992431640625, -268.83984375, 61.09103775024414, 291.01922607421875, 374.447265625, 215.56027221679688, 94.7044906616211, 313.2696228027344, 1229.4580078125, 338.77630615234375, 698.6553955078125, 384.5774230957031, 218.07846069335938, 386.18310546875, 105.63140869140625, 827.0309448242188, 187.40463256835938, 176.9473114013672, 1124.2777099609375, 515.2684326171875, 251.4981231689453, 156.4762420654297, 144.4151611328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 190.730224609375, "std": 328.1693420410156, "min": -765.4088134765625, "p10": -139.96823577880858, "median": 215.86668395996094, "p90": 561.1486694335938, "max": 875.7606201171875, "pos_frac": 0.765625, "sample": [-88.91738891601562, -15.458162307739258, 426.6440734863281, -395.74444580078125, -765.4088134765625, 212.98641967773438, 181.7967987060547, 276.3433532714844, 91.73187255859375, 256.35882568359375, 23.408546447753906, 531.0386962890625, -759.6474609375, -6.403125762939453, 156.1710968017578, 128.61138916015625, 100.4103775024414, 125.15380859375, 571.2382202148438, 79.79051208496094, 307.7754821777344, 218.7469482421875, -17.128047943115234, 250.5461883544922, 641.3693237304688, -34.06523895263672, 486.594482421875, 13.176162719726562, -82.61534118652344, 823.4907836914062, 140.36285400390625, 573.03271484375, 676.8986206054688, -179.4924774169922, 187.5218048095703, 263.3554382324219, 537.6063842773438, 201.0830535888672, 81.173583984375, 716.4739990234375, -549.8165893554688, 410.15167236328125, 129.8457794189453, -407.3256530761719, 527.0264282226562, 357.5323486328125, 425.34759521484375, 233.5003662109375, 4.4923858642578125, 384.8254089355469, 253.61517333984375, 241.22891235351562, -142.3258514404297, -127.89508056640625, 255.5718994140625, 318.14471435546875, 493.1662292480469, 875.7606201171875, 469.4026794433594, 289.7141418457031, 515.8316650390625, -134.46713256835938, 244.8187255859375, 202.57699584960938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 246.22132873535156, "std": 302.5586242675781, "min": -604.2542724609375, "p10": -43.584748077392575, "median": 182.7188949584961, "p90": 671.7873657226564, "max": 854.7387084960938, "pos_frac": 0.828125, "sample": [267.62298583984375, 102.88813018798828, 114.49405670166016, 184.55166625976562, 267.3224792480469, 108.25166320800781, 95.96820068359375, 212.06971740722656, 773.6715698242188, 103.12815856933594, 180.88612365722656, 237.160400390625, 4.202388763427734, 25.211816787719727, 604.2376098632812, -222.068359375, 58.25990295410156, 170.33541870117188, 525.6786499023438, 159.75967407226562, -33.127174377441406, 725.5465087890625, -143.9471435546875, 212.51553344726562, 148.0118865966797, 125.469970703125, -150.3723907470703, 350.5332336425781, 787.3607177734375, 514.8892211914062, 253.34938049316406, 635.7117309570312, 66.43164825439453, 301.0077819824219, 162.6699676513672, 810.9483642578125, 24.349853515625, -370.3348388671875, 135.03451538085938, 543.7081298828125, -45.165802001953125, 331.4976806640625, 854.7387084960938, 111.05266571044922, 463.674072265625, 84.96346282958984, 314.1400146484375, 561.3963012695312, -2.76763916015625, 126.13288879394531, -254.3775634765625, 304.9840393066406, -9.381465911865234, 471.0996398925781, 565.4000854492188, 318.15118408203125, -39.89562225341797, 481.0106201171875, 817.4549560546875, -604.2542724609375, 580.0234985351562, 687.2483520507812, 38.696380615234375, 528.953857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 187.9479217529297, "std": 278.1615295410156, "min": -331.160888671875, "p10": -151.19509887695312, "median": 154.70450592041016, "p90": 482.7317352294924, "max": 1037.7791748046875, "pos_frac": 0.734375, "sample": [315.3998107910156, 9.161571502685547, 275.5317077636719, 410.4212646484375, 803.20751953125, 41.08556365966797, -23.556787490844727, 34.00580596923828, 383.48956298828125, 249.45770263671875, -22.378753662109375, 31.311050415039062, 121.15261840820312, 156.79006958007812, 152.6189422607422, 361.9769287109375, 425.0724182128906, 801.6845703125, 38.738929748535156, -193.94451904296875, 59.563072204589844, 146.61993408203125, 286.7740478515625, 399.0857238769531, 92.55538177490234, 348.4990234375, 296.89544677734375, 871.0361938476562, -5.0123748779296875, 202.62142944335938, -153.8861083984375, 535.9091796875, 370.41436767578125, -144.91607666015625, -112.44889831542969, 319.6362609863281, 365.94378662109375, -31.926326751708984, 512.7528076171875, 206.03341674804688, 273.1817626953125, 109.19349670410156, 122.92183685302734, 137.33262634277344, -331.160888671875, 218.56988525390625, 388.99371337890625, 367.9617919921875, -315.86431884765625, -261.6530456542969, 507.44287109375, 200.250732421875, 395.8950500488281, -16.32709503173828, 1037.7791748046875, -103.18914794921875, 65.12803649902344, 306.3562927246094, 396.8149108886719, 120.57781219482422, -55.93578338623047, -250.7394256591797, -45.029685974121094, -177.20933532714844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 254.6417694091797, "std": 380.8979187011719, "min": -687.2521362304688, "p10": -181.7139511108398, "median": 275.2345886230469, "p90": 731.6075744628906, "max": 1121.0247802734375, "pos_frac": 0.734375, "sample": [-17.585979461669922, -241.62313842773438, -1.3324337005615234, 460.0488586425781, 862.7178955078125, 415.96630859375, 35.21208572387695, 564.77880859375, 430.439697265625, 1121.0247802734375, -243.68807983398438, 71.63104248046875, 535.96875, 230.47164916992188, 438.555908203125, -70.40765380859375, -576.9261474609375, 731.2958984375, 420.99456787109375, 566.10546875, 464.77496337890625, 428.5443115234375, -78.84341430664062, 0.8673362731933594, 388.3473815917969, 751.4736938476562, 403.994384765625, 253.07208251953125, -125.575439453125, 297.3970947265625, -87.5772933959961, 320.583740234375, 940.0673828125, -61.511985778808594, 297.94317626953125, 37.49119567871094, 563.00732421875, 492.5584716796875, 37.5682258605957, 147.8221435546875, 85.2047119140625, 157.135986328125, -4.687141418457031, -132.78439331054688, -653.5535278320312, -78.77299499511719, 50.01103210449219, 124.96576690673828, 1093.84423828125, 182.01107788085938, 561.8995361328125, 473.9555969238281, 571.4193725585938, -687.2521362304688, 561.5368041992188, 312.45233154296875, 763.207275390625, 731.7411499023438, 398.150634765625, 207.5496063232422, 706.56298828125, -202.6837615966797, 81.06840515136719, -211.56204223632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 190.3538818359375, "std": 339.73333740234375, "min": -534.6334228515625, "p10": -268.9162643432617, "median": 193.17254638671875, "p90": 615.8724121093753, "max": 961.6597290039062, "pos_frac": 0.75, "sample": [73.9079818725586, 127.903076171875, -332.6730041503906, 346.0420837402344, -166.36856079101562, 674.453125, -106.89910125732422, 72.17945098876953, 216.98260498046875, 350.0167236328125, -428.6048583984375, 186.79779052734375, 103.64604949951172, 54.55821990966797, -278.567626953125, 346.70526123046875, 147.32369995117188, -282.10931396484375, 730.6160888671875, -190.12171936035156, 961.6597290039062, 907.7948608398438, 26.98040771484375, 377.6476745605469, 199.54730224609375, 30.53787612915039, 335.7252197265625, 14.517620086669922, 646.49072265625, 529.708740234375, 170.34109497070312, 423.8157958984375, 388.1571044921875, -529.9014282226562, -66.3914566040039, -247.5016326904297, -210.07608032226562, 213.68800354003906, 265.850341796875, -120.34441375732422, 522.839111328125, 372.074951171875, 295.4466552734375, 3.583038330078125, 771.9450073242188, 312.44927978515625, 537.6151123046875, -188.40524291992188, 186.34657287597656, -278.0939636230469, 327.2709045410156, 167.37965393066406, 526.7521362304688, 281.00030517578125, -56.48773956298828, -534.6334228515625, 4.9010772705078125, 262.90576171875, 65.20780181884766, 544.4296875, 342.01947021484375, 886.9033203125, 453.2144775390625, 411.94879150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 202.4704132080078, "std": 261.8102722167969, "min": -315.821044921875, "p10": -88.21687088012695, "median": 178.42408752441406, "p90": 564.2329711914062, "max": 951.0037231445312, "pos_frac": 0.796875, "sample": [248.31631469726562, 177.93809509277344, -73.71688079833984, -56.52907943725586, 680.5343017578125, 435.8782653808594, 24.630218505859375, 257.3056640625, 219.25439453125, 517.5774536132812, 98.178466796875, -36.07343292236328, 42.69525909423828, -76.52335357666016, 154.1426544189453, 567.6423950195312, -222.29147338867188, 387.96490478515625, 73.15107727050781, 80.18569946289062, 185.3241729736328, 81.04874420166016, 182.73365783691406, 132.66741943359375, -2.5640201568603516, 78.08662414550781, 344.369140625, 332.0617370605469, -93.22837829589844, 188.70323181152344, 113.08384704589844, -119.00997161865234, 951.0037231445312, 48.56028747558594, 114.09708404541016, 556.2776489257812, 13.804740905761719, -315.821044921875, 23.608827590942383, 221.72837829589844, 428.9505615234375, 683.084228515625, 437.3307800292969, 107.60968780517578, 250.98251342773438, 681.2080078125, -109.83033752441406, -132.23550415039062, 445.5487365722656, 183.0979461669922, 135.4978790283203, -62.89927291870117, 456.3569641113281, 246.22926330566406, 5.169898986816406, 333.6026611328125, 376.21063232421875, -259.4644775390625, 350.2986755371094, 807.2930297851562, 644.6371459960938, 207.91998291015625, 25.800453186035156, 178.9100799560547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 297.2268981933594, "std": 293.5876770019531, "min": -368.1769104003906, "p10": -64.55359573364257, "median": 296.5490264892578, "p90": 639.7203857421875, "max": 973.591552734375, "pos_frac": 0.84375, "sample": [-187.57383728027344, 213.7382354736328, -130.33253479003906, 505.3193664550781, 204.86007690429688, 116.4690933227539, 785.9112548828125, 171.35623168945312, 380.1824951171875, 547.5343627929688, 274.8971862792969, 447.5745849609375, 81.23416137695312, -51.70103454589844, -70.06183624267578, 151.18968200683594, 42.059356689453125, -368.1769104003906, 500.2777099609375, -222.3691864013672, 591.061767578125, 646.0496826171875, 973.591552734375, 894.2206420898438, 105.91932678222656, 520.7872314453125, 380.98944091796875, 449.7734375, 242.6582489013672, 297.71990966796875, 648.1482543945312, 301.7743225097656, 218.38934326171875, 73.53388977050781, 427.54473876953125, -314.7455749511719, 944.3905029296875, 10.334274291992188, 297.2385559082031, -41.87902069091797, 472.2452087402344, 295.8594970703125, 881.6727294921875, -5.988880157470703, 180.4738311767578, 244.5727996826172, 408.26263427734375, 100.0899429321289, 309.9762878417969, 326.7747802734375, 414.4701843261719, 624.9520263671875, 610.440185546875, 352.96783447265625, 526.167724609375, 419.4804382324219, 176.68588256835938, 100.0655288696289, -92.3641357421875, 239.10487365722656, 570.6445922851562, 145.83221435546875, 162.18711853027344, 498.05914306640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 282.22406005859375, "std": 288.94122314453125, "min": -236.3560791015625, "p10": -17.702283477783197, "median": 256.833251953125, "p90": 661.7317932128907, "max": 1277.878662109375, "pos_frac": 0.859375, "sample": [86.91551208496094, 172.55667114257812, 275.43035888671875, 280.37255859375, 207.2118682861328, 179.22410583496094, 141.74093627929688, -23.289104461669922, 223.43106079101562, 59.86195373535156, 10.740528106689453, 28.13174057006836, 624.9395751953125, 469.230712890625, -9.953449249267578, 1277.878662109375, 508.9457092285156, 307.0511169433594, 15.270370483398438, 305.64764404296875, 150.68374633789062, 0.843597412109375, 311.8207702636719, 37.558349609375, 496.44195556640625, 313.1424560546875, 724.5203247070312, 187.17808532714844, 458.23089599609375, 16.11859130859375, 263.3248291015625, -20.347267150878906, -197.7198944091797, 227.3555145263672, -172.2279052734375, 601.3301391601562, 158.77598571777344, -149.7132110595703, 340.2869567871094, 137.61302185058594, 196.83352661132812, -146.72714233398438, 262.31781005859375, 471.34967041015625, 251.34869384765625, 400.238037109375, 571.748046875, 463.5235595703125, 421.0924072265625, 375.1973876953125, 159.47068786621094, 529.9048461914062, -11.530654907226562, 823.27197265625, 269.8616638183594, 128.31729125976562, 765.470458984375, 385.1581115722656, 653.2466430664062, 830.0709228515625, -236.3560791015625, 761.1424560546875, 45.464393615722656, 665.3682861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 188.8873291015625, "std": 316.580322265625, "min": -798.4986572265625, "p10": -181.60656127929687, "median": 206.67046356201172, "p90": 557.1519287109375, "max": 757.0064697265625, "pos_frac": 0.78125, "sample": [45.863372802734375, 554.3540649414062, 505.5331115722656, 496.3990478515625, 397.6609191894531, -102.45869445800781, 294.4295654296875, -579.924560546875, 481.1696472167969, 570.6981811523438, -798.4986572265625, 43.88752746582031, 531.137939453125, 404.8652038574219, 82.19049072265625, 1.8337516784667969, 217.4482879638672, 356.76251220703125, 566.5863647460938, -662.958251953125, -18.923919677734375, 558.3510131835938, 551.8756713867188, 307.247314453125, 139.88665771484375, 171.28346252441406, 70.09974670410156, 187.42837524414062, -3.5003509521484375, 338.88751220703125, 33.78961181640625, 757.0064697265625, -307.54351806640625, 422.897705078125, 203.5030975341797, 441.6037902832031, 507.308349609375, -259.3951721191406, 294.1612243652344, 43.94428253173828, 675.45556640625, 571.4085693359375, 277.50750732421875, -154.99578857421875, 209.83782958984375, 63.564476013183594, 289.8687438964844, 8.240882873535156, -183.99058532714844, -256.0653076171875, 644.0833129882812, 389.5260314941406, 162.182861328125, -57.90317916870117, 143.62158203125, -176.04383850097656, 63.61420440673828, 380.30340576171875, -42.397430419921875, 54.096946716308594, 310.42742919921875, 418.39898681640625, 161.59828186035156, 289.55859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 236.9807586669922, "std": 366.41131591796875, "min": -309.1239929199219, "p10": -85.27668914794921, "median": 168.28346252441406, "p90": 708.2346191406252, "max": 2097.28662109375, "pos_frac": 0.71875, "sample": [491.8670959472656, 199.86648559570312, 2097.28662109375, 128.99256896972656, -184.6819610595703, 155.99826049804688, 199.6561279296875, 451.4505310058594, -33.554405212402344, -42.44703674316406, 265.7181701660156, -90.85588073730469, 45.224510192871094, 733.9071044921875, -66.31993103027344, 111.83624267578125, -61.267295837402344, -309.1239929199219, 491.3121643066406, 583.218505859375, 847.701904296875, -144.74264526367188, 104.75619506835938, 245.70394897460938, 443.59503173828125, 206.717529296875, -33.671592712402344, -20.948335647583008, 262.12567138671875, 289.7903747558594, 33.4296875, 206.20407104492188, 392.65576171875, -130.0558319091797, 29.675552368164062, 498.86505126953125, 108.19528198242188, 113.3033447265625, 170.950439453125, 244.37054443359375, -72.25857543945312, 426.1583251953125, 648.3321533203125, 69.14408874511719, 175.64096069335938, 254.61175537109375, 451.03607177734375, 135.17869567871094, 165.61648559570312, 768.6514282226562, 311.43524169921875, -58.87255859375, 750.0010986328125, -65.36128234863281, 133.3694305419922, 69.69631958007812, 840.3868408203125, 208.969970703125, -70.2257080078125, -187.3350830078125, 870.0116577148438, 508.99334716796875, -0.0818939208984375, -203.0368194580078], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 250.6724853515625, "std": 321.3119201660156, "min": -681.3155517578125, "p10": -76.2742736816406, "median": 227.65650177001953, "p90": 656.1837097167971, "max": 1026.7198486328125, "pos_frac": 0.828125, "sample": [1026.7198486328125, 444.3915100097656, -681.3155517578125, 384.19268798828125, 586.767822265625, 215.06597900390625, 341.1860656738281, 485.91961669921875, 362.47613525390625, 61.36296081542969, 544.1292724609375, 189.8365020751953, 96.93023681640625, 228.2030029296875, -41.51872253417969, 604.1926879882812, 980.2211303710938, 806.018798828125, 233.67398071289062, -168.06187438964844, 14.664398193359375, -0.02813720703125, -24.28753662109375, 87.81663513183594, -118.30682373046875, 110.87425231933594, 217.13841247558594, 201.97561645507812, -106.64020538330078, 253.9205322265625, 280.236572265625, 21.92650032043457, 105.38787841796875, 283.0524597167969, 529.782470703125, 361.7687683105469, 420.38671875, -490.369140625, 528.8153076171875, -342.8293762207031, 262.9076843261719, 248.12149047851562, 678.465576171875, -5.6194610595703125, 93.72185516357422, 0.9753913879394531, 584.7652587890625, 592.2839965820312, 19.916751861572266, 2.195096969604492, 905.458740234375, 46.87379455566406, 260.78082275390625, 32.574554443359375, 188.41494750976562, 708.2642211914062, 700.1078491210938, 277.84332275390625, 227.11000061035156, 500.5531921386719, 158.2687225341797, -91.16950988769531, 421.23699951171875, 193.30978393554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 220.05868530273438, "std": 287.1812744140625, "min": -622.4014282226562, "p10": -113.81833496093749, "median": 192.30191802978516, "p90": 527.6524841308594, "max": 971.668212890625, "pos_frac": 0.796875, "sample": [-91.32473754882812, 176.03013610839844, -117.24552917480469, -7.503028869628906, 532.1202392578125, 270.8840637207031, 379.4276428222656, 792.1357421875, 504.8852233886719, -125.96822357177734, 177.22666931152344, 815.3818359375, -58.47564697265625, -184.15110778808594, 249.4011993408203, 266.8591613769531, -105.82154846191406, 43.355560302734375, 846.4207763671875, -256.075439453125, 409.634033203125, 395.46368408203125, 303.1829833984375, 421.3945617675781, 553.56640625, 15.794921875, 428.536865234375, 517.2277221679688, 57.99476623535156, 106.94221496582031, 971.668212890625, 31.52259063720703, 472.5753479003906, 266.68353271484375, 306.3722839355469, 159.7825927734375, -2.7772979736328125, 90.67923736572266, 242.868896484375, 134.629638671875, 440.9653625488281, 74.13040161132812, 213.2996368408203, 148.10586547851562, 152.9746551513672, 321.4154968261719, 261.5892639160156, -174.20361328125, 178.86325073242188, 180.20513916015625, 270.88714599609375, 230.56817626953125, 284.3222961425781, 500.714111328125, 871.573974609375, 17.023544311523438, 68.41605377197266, 48.12361145019531, -622.4014282226562, 204.39869689941406, -139.1542205810547, 109.33447265625, 458.2916259765625, -6.993499755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 226.0380401611328, "std": 290.1693420410156, "min": -542.5657958984375, "p10": -93.41583251953125, "median": 226.5155029296875, "p90": 562.6603271484375, "max": 963.0492553710938, "pos_frac": 0.75, "sample": [127.09645080566406, -273.452392578125, 286.12261962890625, 514.9500732421875, 447.5695495605469, 7.489522933959961, 659.015869140625, 158.57302856445312, 354.78973388671875, -4.858428955078125, -542.5657958984375, 226.76419067382812, -321.2164611816406, 400.8862609863281, -55.19805908203125, 211.09349060058594, 226.26681518554688, -323.4387512207031, 165.3134765625, 564.8836669921875, 63.142723083496094, 484.248779296875, 198.86985778808594, -21.4819278717041, -181.21286010742188, 643.4278564453125, 157.48074340820312, 116.49549102783203, 393.3816223144531, 393.450439453125, 791.835693359375, 356.42755126953125, 789.489501953125, -89.61430358886719, 963.0492553710938, -102.16192626953125, -73.90218353271484, 17.076139450073242, 263.5339050292969, 284.6030578613281, 30.267539978027344, -40.5321044921875, 299.839599609375, 333.7024230957031, 418.29669189453125, 106.7919921875, 445.97412109375, 480.1974792480469, 471.386474609375, 557.4725341796875, 199.8769073486328, 388.1067810058594, 389.53387451171875, -79.94329833984375, -44.190940856933594, 696.2463989257812, 269.2076110839844, 398.51025390625, -95.04505920410156, 288.76422119140625, 72.96630859375, 405.57745361328125, 202.61837768554688, -7.416221618652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 254.41961669921875, "std": 295.1921081542969, "min": -387.9889221191406, "p10": -82.82229766845703, "median": 217.5363311767578, "p90": 621.1742614746094, "max": 968.427734375, "pos_frac": 0.78125, "sample": [116.99224853515625, -35.62737274169922, 414.1059875488281, -84.06486511230469, 204.34609985351562, 150.56353759765625, 858.9940185546875, -59.27555847167969, 166.37185668945312, -62.7236328125, 248.24075317382812, 478.3077087402344, 2.7157554626464844, 200.46185302734375, 401.268798828125, 471.75115966796875, 270.0594482421875, 968.427734375, 84.23286437988281, 230.7265625, 169.43252563476562, 591.254150390625, 412.272705078125, 617.7950439453125, 172.8182373046875, 152.15426635742188, 41.11023712158203, 320.515869140625, -387.9889221191406, -44.80792236328125, -119.92059326171875, -19.68446159362793, 355.3988952636719, 735.0671997070312, 613.284912109375, 355.1800537109375, 296.1016845703125, -342.3974914550781, 536.445556640625, 304.54254150390625, 770.4326171875, 178.68853759765625, -89.17916870117188, 788.7561645507812, -79.9229736328125, 275.31439208984375, 529.6011352539062, 381.3534240722656, 496.2347412109375, 116.63195037841797, 81.577880859375, 261.24188232421875, 767.8912353515625, 102.92646789550781, -72.29940795898438, 100.79595947265625, 174.82247924804688, -316.08148193359375, -99.02191925048828, 569.017578125, 172.59413146972656, 622.6224975585938, 324.846923828125, 439.5627746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 210.9752655029297, "std": 330.1168518066406, "min": -418.907470703125, "p10": -101.90445709228514, "median": 160.55846405029297, "p90": 719.6625732421876, "max": 1067.6524658203125, "pos_frac": 0.734375, "sample": [303.44635009765625, 731.66748046875, 98.73640441894531, 304.84893798828125, 69.26449584960938, -24.524871826171875, 649.5267944335938, 7.8215484619140625, 146.42701721191406, 102.57408905029297, -308.45379638671875, 760.5123901367188, 93.65037536621094, -43.30016326904297, -84.84331512451172, 240.58932495117188, 326.9039611816406, 591.595458984375, 691.651123046875, 198.09823608398438, 80.41438293457031, 907.6290893554688, 267.6795959472656, -86.58090209960938, 202.51397705078125, -222.43109130859375, 160.6326446533203, -108.47169494628906, -3.235383987426758, 47.31121063232422, 331.7823181152344, -300.52349853515625, 301.8176574707031, 436.0652770996094, 1067.6524658203125, 362.49542236328125, 578.0948486328125, 229.68148803710938, 2.0755767822265625, 227.05853271484375, 140.19024658203125, -50.097923278808594, -391.24755859375, -64.92449951171875, -7.096336364746094, 243.59091186523438, 160.48428344726562, 245.54444885253906, -379.2732849121094, 600.1939697265625, 770.3310546875, 8.044445037841797, 232.8202667236328, 786.8838500976562, 101.6887435913086, -45.87348937988281, 463.754638671875, 388.04644775390625, 109.704345703125, 93.50639343261719, 962.1694946289062, 280.51641845703125, -418.907470703125, -65.48622131347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 245.92010498046875, "std": 354.17364501953125, "min": -267.0303955078125, "p10": -109.75889358520507, "median": 148.78441619873047, "p90": 770.4332885742188, "max": 1414.7939453125, "pos_frac": 0.78125, "sample": [278.8335266113281, 339.9591979980469, 385.2405090332031, 433.3355712890625, -165.10498046875, 77.39305877685547, 611.8092041015625, 129.27972412109375, 79.76805114746094, 859.2696533203125, 36.06584930419922, 68.70425415039062, 184.4051513671875, 107.46240234375, 801.9490966796875, -220.17282104492188, 115.06204223632812, -267.0303955078125, 34.551025390625, 8.098609924316406, 204.27005004882812, 684.531005859375, 75.93646240234375, 47.95227813720703, 425.3905029296875, 197.51853942871094, -112.23491668701172, 204.52655029296875, 457.04302978515625, 134.30467224121094, 148.833251953125, 776.3867797851562, -80.09805297851562, 270.82611083984375, -20.806791305541992, -103.98150634765625, 1220.26318359375, -234.31723022460938, 184.77609252929688, 89.33478546142578, 850.5223388671875, 246.42352294921875, 1027.3360595703125, 520.9061279296875, 224.3857421875, 5.105018615722656, -182.8802490234375, 97.54454040527344, 487.7455139160156, 561.8341064453125, 173.67816162109375, 756.5418090820312, -61.553932189941406, 148.73558044433594, 8.429668426513672, 548.951171875, 58.7752571105957, -41.271827697753906, -75.03803253173828, 154.67562866210938, 1414.7939453125, 542.8599243164062, -173.55657958984375, -25.390411376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 239.32122802734375, "std": 430.77850341796875, "min": -886.4420776367188, "p10": -230.66623687744138, "median": 211.94644927978516, "p90": 749.1942626953125, "max": 1935.6561279296875, "pos_frac": 0.734375, "sample": [280.72979736328125, 167.23196411132812, 240.25579833984375, 526.7186889648438, 428.4877014160156, 820.2139892578125, -59.860595703125, 26.985321044921875, 162.46969604492188, -399.587646484375, -214.5171356201172, -43.86463165283203, 729.0468139648438, 923.6323852539062, 222.941162109375, -453.4664001464844, 174.98951721191406, 793.4075317382812, 108.47900390625, 53.567840576171875, 43.051170349121094, 86.91288757324219, 23.633819580078125, 749.230224609375, 826.431884765625, 136.72067260742188, 247.8198699951172, 453.56707763671875, 423.0531005859375, 1935.6561279296875, 38.233924865722656, -510.3300476074219, 284.33612060546875, 534.1612548828125, -470.58074951171875, -109.49905395507812, 129.8338623046875, 258.5351257324219, 626.2760009765625, -347.8359680175781, -53.008575439453125, 492.7418212890625, -55.83100891113281, -886.4420776367188, 450.97918701171875, 418.53094482421875, 337.98297119140625, 428.80078125, 391.8775939941406, -32.815242767333984, 200.9517364501953, -237.5872802734375, -96.3634033203125, 433.6405029296875, 235.48097229003906, 121.36163330078125, 749.1103515625, 404.30029296875, 522.0953369140625, 1036.9312744140625, -81.5692138671875, -92.1336898803711, 121.70703887939453, 658.7482299804688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 287.401123046875, "std": 390.72393798828125, "min": -541.2836303710938, "p10": -95.57558975219722, "median": 167.1875, "p90": 812.4189758300782, "max": 1258.87939453125, "pos_frac": 0.796875, "sample": [-114.39180755615234, 190.6855926513672, 543.2965087890625, 8.16064453125, 296.64227294921875, 160.40695190429688, -41.20305633544922, 225.5063018798828, 66.02688598632812, 1.7739830017089844, -26.868946075439453, 454.2442626953125, -32.92420196533203, 494.04559326171875, 399.9932861328125, 694.3413696289062, 989.4497680664062, 426.2165222167969, -51.67108154296875, 113.97154235839844, 155.2420654296875, 545.221435546875, 85.539306640625, 281.8447570800781, -168.40956115722656, 268.9700012207031, 793.9882202148438, 333.966064453125, 913.6355590820312, 173.96804809570312, 66.43658447265625, -541.2836303710938, 112.17391967773438, -173.35911560058594, -8.626502990722656, 547.35498046875, -50.482791900634766, 467.1025695800781, 90.77025604248047, 1258.87939453125, -286.9820556640625, 321.351806640625, 790.926513671875, 26.55135154724121, 34.6636962890625, 1199.5711669921875, 707.7440185546875, 313.58782958984375, 683.3778686523438, 119.05525970458984, 60.33831787109375, -316.7401428222656, 73.50152587890625, 50.44935607910156, 16.608413696289062, -244.5417022705078, 348.43157958984375, 481.8763427734375, 1135.4151611328125, 735.0783081054688, 1145.178955078125, 820.31787109375, 158.0240936279297, 69.25245666503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 189.53274536132812, "std": 413.9888610839844, "min": -876.92578125, "p10": -264.7450897216797, "median": 177.75292205810547, "p90": 698.9913696289067, "max": 1433.7340087890625, "pos_frac": 0.765625, "sample": [1237.4324951171875, 80.03946685791016, 859.069580078125, 19.773086547851562, 300.8072204589844, 111.38685607910156, -100.01630401611328, -103.06352233886719, -349.5057373046875, 134.8708038330078, 76.95945739746094, -16.300338745117188, 164.5225372314453, 192.60430908203125, 59.885528564453125, -751.4031982421875, 105.81440734863281, -205.8848419189453, -61.625877380371094, 127.24566650390625, 1204.75341796875, 335.03900146484375, 552.6372680664062, 298.6145324707031, -630.5260620117188, -586.8240356445312, 273.3334045410156, 236.73744201660156, 744.1524658203125, -876.92578125, 390.6228942871094, 356.1473388671875, 237.2122039794922, 140.1194305419922, 144.34339904785156, -111.47711944580078, 164.42022705078125, 448.9395751953125, 448.5018005371094, 183.2329559326172, -264.44403076171875, 34.27027893066406, 402.8837585449219, -264.8741149902344, 422.28338623046875, 99.19990539550781, 348.7360534667969, 1433.7340087890625, 337.96466064453125, 137.711669921875, 593.615478515625, 96.2166748046875, 203.3625946044922, 206.51734924316406, -191.2611541748047, 251.35037231445312, -274.3782958984375, 255.08253479003906, 172.27288818359375, 249.8067626953125, 826.2003173828125, 816.68798828125, 197.13385009765625, 204.3565673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 295.92822265625, "std": 382.186279296875, "min": -629.5835571289062, "p10": -132.24369812011716, "median": 278.8709259033203, "p90": 734.3616149902346, "max": 1404.37646484375, "pos_frac": 0.796875, "sample": [315.951416015625, -310.28656005859375, 74.12732696533203, 561.6571044921875, 53.22545623779297, 347.12530517578125, -278.16748046875, 1384.3677978515625, 888.6822509765625, -64.60038757324219, -147.41812133789062, 230.61502075195312, 186.88043212890625, 594.2008666992188, 607.5830078125, 139.05389404296875, 749.9629516601562, 462.43206787109375, -140.00448608398438, 136.6159210205078, 22.08807945251465, -14.991189956665039, 433.2382507324219, 819.7861938476562, 80.40885925292969, 336.42266845703125, 352.51434326171875, -37.02471923828125, 277.0492858886719, -284.59442138671875, -43.492393493652344, 1404.37646484375, 280.69256591796875, 341.1793212890625, 344.16241455078125, 481.09881591796875, 1136.239013671875, 393.99615478515625, 511.852294921875, 230.94644165039062, -114.13519287109375, 1030.578369140625, 620.4844970703125, 80.43465423583984, 529.7178344726562, 526.0938110351562, 540.537353515625, 11.583902359008789, 158.47731018066406, 166.38365173339844, 275.22613525390625, 293.15789794921875, 303.0809631347656, 248.98886108398438, 452.0570983886719, 291.1376953125, 47.695716857910156, -19.993667602539062, -278.32330322265625, 53.048309326171875, 697.95849609375, -629.5835571289062, 229.93646240234375, 566.9093017578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 283.6527099609375, "std": 351.0422058105469, "min": -384.80389404296875, "p10": -144.5286666870117, "median": 255.5395050048828, "p90": 716.3821411132812, "max": 1171.9107666015625, "pos_frac": 0.796875, "sample": [257.5281066894531, 105.79354858398438, 49.153587341308594, 956.6739501953125, 7.5174102783203125, 582.4691162109375, 433.58685302734375, 149.85760498046875, 42.070556640625, 122.98285675048828, 116.1690673828125, 595.5723876953125, 26.669204711914062, 467.9984436035156, 179.31283569335938, -384.80389404296875, 112.1591796875, 498.6842956542969, 449.00506591796875, 218.25621032714844, 436.2948303222656, 334.71954345703125, 533.88037109375, 673.5778198242188, 21.875843048095703, 158.42047119140625, 1171.9107666015625, 474.7900695800781, 483.229736328125, 650.3621215820312, 744.484130859375, 704.5311889648438, 201.77293395996094, 721.4611206054688, 31.481124877929688, -144.90948486328125, 263.6261291503906, 290.7014465332031, -147.652099609375, 586.88720703125, 2.9930038452148438, 1026.288330078125, 541.9251098632812, -265.5118408203125, 789.6522216796875, 367.50494384765625, 137.52072143554688, -194.876220703125, -46.497215270996094, -73.62532043457031, 283.6983947753906, -14.096229553222656, -143.6400909423828, 424.02972412109375, 224.72943115234375, 261.023193359375, -35.676971435546875, -220.80322265625, 1160.6092529296875, -76.95463562011719, 610.4322509765625, 253.5509033203125, 327.189697265625, -363.7946472167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 296.90032958984375, "std": 318.046875, "min": -559.4014282226562, "p10": -30.7936408996582, "median": 274.96669006347656, "p90": 772.8624572753907, "max": 1124.29541015625, "pos_frac": 0.859375, "sample": [10.837776184082031, 321.44580078125, 51.03639221191406, 788.8421630859375, 826.8197631835938, 353.8367004394531, 31.306659698486328, -559.4014282226562, 235.2984161376953, 672.217529296875, 879.3936157226562, 433.65386962890625, -66.8834457397461, 729.6744995117188, 616.999267578125, 591.8963012695312, 576.0869140625, 364.05682373046875, 147.20352172851562, 414.7264709472656, -32.82861328125, 186.084228515625, 122.85414123535156, 185.47462463378906, 193.9641571044922, -364.32171630859375, 117.70939636230469, 121.50729370117188, 466.32049560546875, 273.8155517578125, 540.371337890625, -112.90007019042969, 61.082794189453125, 196.88934326171875, 32.69969177246094, 179.81915283203125, -15.678070068359375, 471.51434326171875, 344.96551513671875, 816.2484130859375, 378.354736328125, 139.61976623535156, 814.4082641601562, 1124.29541015625, 392.0219421386719, -26.045372009277344, 797.85205078125, 426.7598571777344, 307.16265869140625, 276.9723815917969, 439.20648193359375, 30.858566284179688, -110.11248779296875, 142.55966186523438, 85.52864837646484, 218.43896484375, 136.3629150390625, 393.65753173828125, 735.5764770507812, 202.2681884765625, -299.5899353027344, 614.4632568359375, 300.2431945800781, 276.1178283691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 309.52423095703125, "std": 384.147705078125, "min": -447.59747314453125, "p10": -126.84581909179687, "median": 264.6742706298828, "p90": 734.5395385742188, "max": 1917.8037109375, "pos_frac": 0.8125, "sample": [265.8822326660156, 94.3748550415039, 362.45111083984375, 243.65696716308594, 29.610870361328125, -447.59747314453125, 485.7669372558594, 266.6551818847656, 347.2777404785156, -123.40512084960938, 118.69537353515625, 686.9407958984375, 263.46630859375, 180.65928649902344, 740.2109375, 611.21728515625, 804.126708984375, 44.630462646484375, 1917.8037109375, 804.1213989257812, 215.08688354492188, 1091.6927490234375, 481.69940185546875, 450.4881591796875, -226.2127227783203, 249.5064697265625, 556.4642333984375, -149.81442260742188, 275.4013977050781, 159.20802307128906, -139.6552734375, -55.28233337402344, -0.37990570068359375, -128.32040405273438, 384.48223876953125, -24.366777420043945, 213.96810913085938, 620.0966186523438, 104.7857666015625, 32.82904815673828, 612.560546875, 50.510154724121094, 136.2672576904297, -436.47314453125, 741.878173828125, 580.13134765625, 446.8481140136719, -193.49778747558594, 225.0462646484375, 30.8443546295166, 496.45330810546875, 389.8177490234375, 455.94268798828125, 338.49053955078125, 159.73587036132812, 546.9071044921875, 289.8537292480469, 132.32176208496094, 1078.4146728515625, 648.076904296875, 106.42015838623047, 721.3062744140625, 543.2807006835938, -99.81051635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 287.42877197265625, "std": 356.0559997558594, "min": -611.5673217773438, "p10": -109.19194412231444, "median": 257.8446960449219, "p90": 695.7321228027344, "max": 1343.649658203125, "pos_frac": 0.8125, "sample": [8.833915710449219, 72.99020385742188, -388.44378662109375, 172.58380126953125, 580.8624267578125, 6.045166015625, 92.81184387207031, -64.33435821533203, 290.75262451171875, -611.5673217773438, 487.92333984375, 92.78266143798828, 656.031494140625, 114.9957275390625, 246.76187133789062, 572.2622680664062, -42.76972198486328, -90.69441223144531, 690.7018432617188, -135.44638061523438, -133.96852111816406, 114.04925537109375, 770.769287109375, 507.4183349609375, 241.6090087890625, 283.5948791503906, 274.30596923828125, 142.11376953125, 475.6156005859375, 766.5738525390625, 343.43798828125, 554.20654296875, -56.779762268066406, 358.0594482421875, 1343.649658203125, 24.27424430847168, 126.58906555175781, 214.81065368652344, 83.5528335571289, 353.71014404296875, 691.5966796875, 578.5145874023438, 268.9275207519531, -242.0218505859375, 1026.01123046875, 327.393798828125, 229.932373046875, -92.32098388671875, 465.7750549316406, 44.748138427734375, 289.1516418457031, -181.54136657714844, 1066.1768798828125, -116.42235565185547, 697.5044555664062, 871.8547973632812, 67.7110595703125, 379.170166015625, 113.9119644165039, 320.7818908691406, 566.841064453125, 671.7861328125, 595.4412841796875, 213.84100341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 296.62786865234375, "std": 443.0687255859375, "min": -823.38623046875, "p10": -171.1223617553711, "median": 251.1419219970703, "p90": 744.4880371093751, "max": 2313.59619140625, "pos_frac": 0.765625, "sample": [-6.8546295166015625, 52.624183654785156, 336.8693542480469, 15.499008178710938, 723.0838623046875, 699.42041015625, 294.22418212890625, 23.400625228881836, 240.04058837890625, 249.94451904296875, 902.5267333984375, -171.71229553222656, -35.081214904785156, 252.33932495117188, 637.861083984375, 26.437129974365234, 420.74383544921875, 154.90234375, 121.81979370117188, -265.33245849609375, 845.2598876953125, 274.7724914550781, 544.1541137695312, 666.1676025390625, 362.96551513671875, 720.7626342773438, -239.16256713867188, 43.87318420410156, 363.08551025390625, 447.1004333496094, 172.7522735595703, 218.21591186523438, 64.66485595703125, 593.1860961914062, 570.875, 698.3934936523438, -53.26183319091797, 194.9360809326172, -169.745849609375, -124.71641540527344, 2313.59619140625, 621.37744140625, -291.66436767578125, 357.1505126953125, 417.4462890625, 184.75506591796875, -65.2447509765625, -823.38623046875, -52.41739273071289, 1085.88623046875, 287.6725769042969, 870.9515991210938, 753.6612548828125, -249.12046813964844, 502.9144287109375, 179.78085327148438, 406.74420166015625, 1028.5426025390625, 266.0286560058594, -54.41606140136719, -227.6687469482422, 10.54412841796875, 145.0873260498047, 448.9291687011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 325.9302978515625, "std": 322.8531494140625, "min": -738.4168090820312, "p10": 6.44699859619141, "median": 297.90272521972656, "p90": 768.5185546875001, "max": 1173.3441162109375, "pos_frac": 0.921875, "sample": [375.4841003417969, 629.8573608398438, 800.676025390625, 415.7884216308594, 79.25445556640625, 729.140380859375, 1173.3441162109375, -74.83647155761719, 378.7436828613281, 9.582344055175781, 587.0901489257812, 496.2901611328125, 752.264404296875, 319.2219543457031, 430.4998779296875, 787.0145263671875, 121.84375, 61.4251708984375, 207.052490234375, 25.86081314086914, 413.533203125, 721.9722900390625, 479.12054443359375, 775.484619140625, 229.3747100830078, -27.62435531616211, 79.73234558105469, 888.0419921875, 370.90350341796875, 635.8287963867188, 69.93067932128906, 370.04046630859375, 318.1763916015625, 270.0208740234375, 290.4315490722656, 30.073627471923828, 237.17002868652344, 397.98095703125, 222.1597900390625, 131.3693389892578, 885.309326171875, -203.78187561035156, 235.44964599609375, 446.0769958496094, 175.32888793945312, 351.0433349609375, 62.054176330566406, 666.64697265625, 216.66651916503906, 52.84385299682617, 522.9044799804688, -86.02896118164062, 598.2454223632812, 298.70635986328125, 21.418554306030273, -738.4168090820312, 319.76690673828125, 297.0990905761719, 5.103279113769531, 179.7096710205078, 134.09085083007812, 1026.298828125, 1.3022689819335938, 182.38226318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 242.31259155273438, "std": 430.9094543457031, "min": -1305.51513671875, "p10": -197.7308822631836, "median": 262.65667724609375, "p90": 712.849285888672, "max": 1220.9173583984375, "pos_frac": 0.796875, "sample": [62.685272216796875, -120.39312744140625, 46.388916015625, 324.1991882324219, 501.4208984375, 291.5739440917969, 399.98553466796875, 654.6387329101562, -61.184898376464844, 275.1153564453125, 469.8353576660156, 1220.9173583984375, 465.01318359375, 827.9453735351562, 121.9666748046875, 217.68771362304688, 151.68124389648438, -1305.51513671875, -680.7682495117188, -190.852294921875, 80.13933563232422, 97.97632598876953, 513.3416748046875, 594.8135375976562, 728.8849487304688, 268.4681091308594, 584.4439697265625, 210.6886749267578, 375.4460144042969, 591.38623046875, 199.25204467773438, 1048.9493408203125, 1012.60009765625, 354.4622497558594, 525.1124877929688, 696.0648803710938, 459.3531494140625, -368.665283203125, 256.8452453613281, 336.0596923828125, -48.225196838378906, -171.2412109375, 471.7778625488281, -974.0223388671875, 149.7714385986328, 220.75738525390625, 187.5221405029297, 179.46401977539062, 316.75714111328125, 63.897422790527344, 644.5280151367188, -180.7157440185547, -230.70889282226562, 720.0426025390625, 530.8255615234375, 89.87408447265625, 423.1324462890625, -385.1091613769531, 179.17169189453125, 7.660593032836914, 786.5328369140625, -200.67884826660156, 355.9674072265625, 133.0611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 286.115966796875, "std": 297.4308776855469, "min": -340.3677062988281, "p10": -84.28530807495116, "median": 292.6958770751953, "p90": 695.5730651855469, "max": 897.9811401367188, "pos_frac": 0.828125, "sample": [876.224853515625, -176.48715209960938, 555.3403930664062, 695.7122802734375, -126.0562973022461, 496.7509765625, 429.02288818359375, 615.478515625, 636.5543823242188, -75.94915008544922, 520.7686157226562, 290.55950927734375, 155.7042236328125, 146.2232208251953, 396.66607666015625, 294.8322448730469, 652.8662719726562, 302.9339294433594, 469.32696533203125, 61.75608825683594, 400.16412353515625, 1.0245132446289062, 425.1878662109375, 144.367919921875, 296.6510009765625, 695.2482299804688, 114.01435852050781, -312.55316162109375, 561.07958984375, 251.187255859375, -73.06369018554688, 458.07012939453125, 97.84097290039062, 213.29864501953125, 724.0582885742188, 108.38571166992188, 224.0369110107422, 703.8999633789062, 81.87181854248047, 205.03704833984375, 317.11053466796875, -294.44793701171875, 222.01925659179688, 667.431640625, 236.16204833984375, 399.18597412109375, 135.4029541015625, 733.8287353515625, 897.9811401367188, 113.99674987792969, 145.04393005371094, 324.21832275390625, 377.58807373046875, 14.69747543334961, -340.3677062988281, 301.86456298828125, -192.16156005859375, 483.4253234863281, 485.1885986328125, 748.4559936523438, -77.36644744873047, 183.26242065429688, -87.25053405761719, -21.8847713470459], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 267.510986328125, "std": 361.33447265625, "min": -367.5057373046875, "p10": -131.34562072753906, "median": 192.88392639160156, "p90": 735.5581115722658, "max": 1660.686279296875, "pos_frac": 0.75, "sample": [573.5674438476562, 580.1112060546875, -114.31725311279297, 100.22887420654297, 230.94207763671875, 92.41358947753906, 281.3891296386719, 133.19554138183594, 218.85826110839844, 97.86822509765625, 225.97549438476562, 540.2245483398438, 570.8389282226562, 600.5205688476562, 14.85293960571289, 44.61964416503906, 334.75994873046875, 181.61773681640625, -135.5879364013672, 57.67378234863281, 630.73291015625, -174.30270385742188, 244.75827026367188, 411.16485595703125, 692.714599609375, -230.81307983398438, -121.44688415527344, 753.9196166992188, -367.5057373046875, 357.78118896484375, -90.58769226074219, 182.40518188476562, 487.7144470214844, 859.8551025390625, 835.966064453125, -37.7843132019043, -31.472763061523438, 127.03704833984375, 1660.686279296875, 768.1044311523438, 448.0268859863281, -101.67879486083984, 783.977294921875, 14.184318542480469, 275.7247314453125, 109.78314208984375, 574.3055419921875, 203.3626708984375, 473.12896728515625, 453.7811279296875, -227.82997131347656, 553.4028930664062, -173.0892333984375, 585.5094604492188, 57.207763671875, -114.58683013916016, -155.0087127685547, 134.61306762695312, 981.60009765625, 450.39215087890625, -37.8636360168457, -35.39698028564453, 138.4904022216797, 139.98684692382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 301.52032470703125, "std": 376.1201171875, "min": -738.2205810546875, "p10": -38.10385837554931, "median": 239.36181640625, "p90": 743.6215454101563, "max": 1486.0399169921875, "pos_frac": 0.828125, "sample": [220.98110961914062, 93.4893798828125, 963.4868774414062, 315.396728515625, 575.6328735351562, 122.78993225097656, 756.8079833984375, 359.67791748046875, -24.55282211303711, 225.75254821777344, -578.0531616210938, 596.9351196289062, 420.64434814453125, 356.7718505859375, 203.3065185546875, 271.4118957519531, 430.5605773925781, 424.3778076171875, -41.02050018310547, -738.2205810546875, 19.45976448059082, 541.887939453125, -30.379669189453125, 805.540283203125, -354.6179504394531, -55.7330322265625, 639.9971313476562, 3.7908077239990234, 41.54878234863281, 235.24539184570312, 549.5084228515625, 133.6743621826172, 369.0691223144531, 96.14461517333984, 451.5295104980469, 120.74071502685547, 229.63082885742188, 205.86538696289062, 282.30810546875, 173.20286560058594, 483.32177734375, 471.7787170410156, 108.85029602050781, 404.05810546875, -6.7021942138671875, 741.1019897460938, 1386.630615234375, 352.92071533203125, -31.29836082458496, 213.04840087890625, 1486.0399169921875, 1053.5728759765625, 243.47824096679688, 513.2076416015625, 84.9555892944336, 398.6230773925781, 602.455322265625, -100.63955688476562, 416.3749084472656, -50.53953552246094, 190.3234405517578, 102.71566772460938, 744.7013549804688, 73.73100280761719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 354.0106201171875, "std": 413.1695556640625, "min": -678.8196411132812, "p10": -85.4221977233886, "median": 240.90160369873047, "p90": 975.6032897949223, "max": 1331.726318359375, "pos_frac": 0.859375, "sample": [129.53248596191406, 129.43402099609375, 796.8289184570312, 1011.31103515625, -431.64453125, 325.0752868652344, 665.5467529296875, -128.36465454101562, 1219.983642578125, -211.51748657226562, 1194.489990234375, 246.1886749267578, 860.3939819335938, -269.8451843261719, 1168.39404296875, -113.91974639892578, 235.61453247070312, 678.0535888671875, 203.21853637695312, 218.6791534423828, 106.91851806640625, 496.6225891113281, 151.6534881591797, 13.967721939086914, 408.8761901855469, 1049.655029296875, 796.5545654296875, -117.37773895263672, 333.8797607421875, 429.0755615234375, 217.32205200195312, 574.4102783203125, 217.39959716796875, 143.07498168945312, 150.80743408203125, 215.06881713867188, 207.7957763671875, 278.3305358886719, 92.78399658203125, -678.8196411132812, 399.4702453613281, 71.727294921875, 287.18115234375, 575.3594970703125, 49.09672546386719, 82.35135650634766, -18.927915573120117, 540.7532958984375, 846.0217895507812, 229.95046997070312, 679.10302734375, 98.92462158203125, 437.890380859375, 395.8037414550781, 118.7015151977539, 113.14466857910156, -7.307903289794922, 892.2852172851562, 521.6753540039062, 406.9568176269531, 1221.42431640625, 300.4947204589844, 67.41975402832031, 1331.726318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 339.5527648925781, "std": 346.0445251464844, "min": -184.55343627929688, "p10": -9.308895874023435, "median": 262.13682556152344, "p90": 887.6795532226564, "max": 1371.33935546875, "pos_frac": 0.875, "sample": [172.32012939453125, 174.50804138183594, 755.5711059570312, 94.36972045898438, -5.6295166015625, 306.9867248535156, 88.31613159179688, -16.209991455078125, 261.5680847167969, 825.4242553710938, 334.86090087890625, 25.452531814575195, 93.52967834472656, 1371.33935546875, 304.19915771484375, -69.98163604736328, 60.31736755371094, 259.0669860839844, 327.977783203125, 412.8825988769531, 229.06935119628906, 67.62739562988281, -42.317718505859375, 1061.4627685546875, -85.478759765625, 438.30194091796875, 1275.5130615234375, 374.6474914550781, 601.246337890625, 34.97113037109375, 342.1307373046875, 517.635009765625, 671.6810913085938, 175.11703491210938, 185.48394775390625, 274.55975341796875, 29.19908332824707, 487.73687744140625, 918.7754516601562, 108.92396545410156, 927.8908081054688, 459.6502685546875, 304.0794677734375, 45.84624481201172, -184.55343627929688, 262.70556640625, 81.7350845336914, -42.018218994140625, 853.0848388671875, 60.07415008544922, 902.505859375, 1023.7019653320312, 332.6581726074219, -10.885772705078125, 267.8957824707031, 121.73466491699219, 330.5171203613281, 102.72506713867188, 484.31640625, 237.46099853515625, 200.52413940429688, 536.451171875, 824.911865234375, 165.20913696289062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 347.5189208984375, "std": 344.4895935058594, "min": -387.10357666015625, "p10": -27.702281570434565, "median": 380.92994689941406, "p90": 736.2680725097657, "max": 1312.9747314453125, "pos_frac": 0.859375, "sample": [0.7475051879882812, 449.1213073730469, 448.52569580078125, 400.1751403808594, 531.1402587890625, 631.3518676757812, 776.51025390625, 740.6416015625, 87.87313079833984, 51.63580322265625, 1163.1015625, 365.6434326171875, 53.29025650024414, 247.2439727783203, 55.33644104003906, 249.07171630859375, 644.218017578125, 285.8870544433594, -52.503684997558594, 560.8864135742188, 726.0631713867188, -13.220821380615234, 282.4141845703125, 510.324462890625, 391.67132568359375, 956.7079467773438, -387.10357666015625, -217.3766326904297, 645.5723266601562, 647.3309326171875, 77.10984802246094, 521.7630615234375, 373.2232360839844, 512.302734375, 303.33233642578125, -30.137100219726562, 693.233154296875, 457.1678771972656, 120.60867309570312, 28.199951171875, 489.49505615234375, 893.7400512695312, 449.75225830078125, 109.93229675292969, 21.991214752197266, 174.2093048095703, 616.8933715820312, 34.27643585205078, 530.1318969726562, 388.63665771484375, -280.2850341796875, 703.8192138671875, 330.4201965332031, 840.3851318359375, -269.2081298828125, 423.0220947265625, 55.41993713378906, -148.94906616210938, 399.63153076171875, -22.021038055419922, 302.3079833984375, 548.6495971679688, 46.89952850341797, 1312.9747314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 312.3773193359375, "std": 379.29132080078125, "min": -442.6553649902344, "p10": -136.48988189697263, "median": 297.9230041503906, "p90": 640.9047668457032, "max": 1735.6046142578125, "pos_frac": 0.8125, "sample": [-442.6553649902344, 619.2008666992188, 466.27984619140625, 332.1466979980469, 556.0321655273438, -104.36698913574219, 1735.6046142578125, -193.43687438964844, 285.585693359375, 557.099609375, 216.37527465820312, 310.26031494140625, 498.30303955078125, 401.4683532714844, 746.8506469726562, -175.6795654296875, -99.97352600097656, 22.164663314819336, 217.73507690429688, 612.5157470703125, 457.8493957519531, 502.4436340332031, 109.8836898803711, 531.650390625, 401.3165283203125, 374.8759765625, 323.21142578125, 11.59783935546875, 386.5899658203125, 517.2236938476562, 266.19097900390625, 680.656494140625, 590.282470703125, 833.8211669921875, 106.05401611328125, -25.65570068359375, 369.5912170410156, 77.05375671386719, 498.10968017578125, 257.9677429199219, 795.36376953125, 357.0299987792969, -101.98189544677734, -339.4238586425781, 140.1973876953125, -281.67333984375, 200.61920166015625, -150.2568359375, 223.1768798828125, 1646.2911376953125, 619.6629028320312, 203.75900268554688, 277.3588562011719, 106.5511474609375, 225.93417358398438, 650.0084228515625, 319.4287414550781, 464.1361083984375, 265.06793212890625, 246.6263427734375, -102.99723052978516, 414.0859680175781, -289.2782897949219, 270.23846435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 331.42340087890625, "std": 403.89837646484375, "min": -702.40771484375, "p10": -127.28865051269528, "median": 289.11767578125, "p90": 870.8668334960938, "max": 1314.1563720703125, "pos_frac": 0.8125, "sample": [74.89002990722656, 44.5535888671875, 398.722900390625, 112.43084716796875, 110.6172103881836, 419.7911071777344, -227.9793701171875, 103.75895690917969, 571.0811767578125, -275.5163269042969, 526.8062744140625, 389.1285400390625, 421.6296691894531, 546.9915771484375, -20.74431037902832, 443.0023193359375, -65.09902954101562, 642.3052978515625, 333.28021240234375, 926.402587890625, 15.778564453125, 909.1870727539062, 658.140869140625, -28.121356964111328, 352.0574645996094, -138.13206481933594, 786.7605590820312, -101.98735046386719, 130.7912139892578, 29.86664390563965, 244.41294860839844, 32.75830841064453, 863.474365234375, -702.40771484375, 1314.1563720703125, 287.0390625, 1206.9205322265625, 558.7112426757812, 274.2027587890625, -299.8184814453125, 197.6424560546875, 214.37326049804688, 449.031005859375, 187.0767822265625, 78.570556640625, 273.3811340332031, -437.2867736816406, 184.84661865234375, 613.3220825195312, 693.8057861328125, 736.3067626953125, 110.22749328613281, 366.80670166015625, 1189.2757568359375, 291.1962890625, 114.9902572631836, 934.2281494140625, -183.83863830566406, 848.8941040039062, 774.1434326171875, 874.0350341796875, 302.8834228515625, -7.481716156005859, 534.8248901367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 355.984619140625, "std": 387.36895751953125, "min": -383.0450439453125, "p10": -133.63792877197267, "median": 330.1430206298828, "p90": 891.3609985351562, "max": 1520.5244140625, "pos_frac": 0.828125, "sample": [907.5982055664062, 703.1665649414062, 521.0838623046875, -93.9432373046875, 109.01435852050781, 801.8900146484375, 933.10205078125, 287.6183776855469, 33.395164489746094, 229.0127716064453, -157.03829956054688, 706.8295288085938, 150.4656219482422, 558.17822265625, 212.04234313964844, 271.68438720703125, 238.22811889648438, -383.0450439453125, 185.9475860595703, 1299.50634765625, 1146.0235595703125, 202.4337158203125, 493.4690856933594, 67.68701171875, 992.3199462890625, 292.45635986328125, 1520.5244140625, 413.29052734375, -64.49068450927734, 29.098785400390625, 346.7771301269531, -134.11935424804688, 553.6104125976562, -159.45289611816406, 518.2286376953125, 516.3273315429688, -132.5146026611328, 381.27923583984375, 264.1472473144531, -44.559837341308594, 884.388427734375, 582.0535278320312, 432.2680969238281, 389.65802001953125, 495.9971923828125, 206.53805541992188, 71.28190612792969, 152.60279846191406, 392.8753356933594, 313.5089111328125, -240.95555114746094, 94.1161117553711, 131.79119873046875, 381.4947204589844, -281.9185791015625, 567.716064453125, 894.3492431640625, 503.3995361328125, -240.35867309570312, 811.9192504882812, 542.5421752929688, 445.526123046875, 65.10002899169922, 469.84942626953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 281.666259765625, "std": 389.58416748046875, "min": -530.289306640625, "p10": -190.25709457397454, "median": 219.9975357055664, "p90": 730.2568664550782, "max": 1393.4710693359375, "pos_frac": 0.78125, "sample": [119.18917846679688, 55.45622253417969, 742.3148193359375, 410.7837829589844, -220.8575439453125, 473.1100158691406, -530.289306640625, 1393.4710693359375, 89.93307495117188, 578.6121826171875, 166.08547973632812, -91.79413604736328, 236.5853271484375, 556.6106567382812, 1066.89306640625, 429.52838134765625, 615.2112426757812, -57.5068359375, 231.1012725830078, 350.9931640625, 318.84869384765625, 702.1216430664062, 655.512451171875, 958.7110595703125, -229.6938018798828, 1.002655029296875, 1071.53125, 1378.558349609375, 231.0172119140625, 314.419677734375, 451.7903137207031, 163.88294982910156, 178.80462646484375, 31.67388916015625, 232.083251953125, 208.9778594970703, -217.21728515625, -7.843292236328125, -98.67559814453125, 91.43008422851562, 794.7695922851562, -38.73419952392578, 611.7337036132812, 669.8027954101562, -313.93603515625, 468.6097717285156, 208.87474060058594, 568.2887573242188, 17.48077392578125, -31.6309814453125, 571.2899169921875, 136.22256469726562, 142.96578979492188, 164.2615966796875, 441.3125, -127.34998321533203, -258.3446350097656, 88.92556762695312, 188.2822265625, 301.727783203125, 141.5504150390625, -384.8055419921875, 362.658447265625, 250.31884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 238.94667053222656, "std": 405.75531005859375, "min": -557.4100952148438, "p10": -220.85390014648434, "median": 195.1805191040039, "p90": 620.1423706054687, "max": 1381.041259765625, "pos_frac": 0.6875, "sample": [335.1590576171875, 8.538307189941406, -8.279319763183594, -183.8224334716797, 392.1199035644531, 532.6734619140625, 29.327531814575195, 513.6526489257812, 571.4254150390625, 837.4424438476562, 482.535400390625, -243.10272216796875, -348.207275390625, 39.367366790771484, 166.65371704101562, 490.72723388671875, 569.0921630859375, 206.53933715820312, -178.15496826171875, 623.4044799804688, -194.04660034179688, 136.81155395507812, 356.163330078125, -557.4100952148438, -109.29515075683594, 44.565338134765625, 622.8616333007812, -395.52972412109375, 34.10494613647461, -32.7768669128418, 1297.82666015625, 368.2724304199219, -368.0379943847656, 285.19158935546875, 613.7974243164062, 1047.4259033203125, -153.010009765625, -82.44141387939453, 1347.61767578125, 264.98504638671875, 149.13209533691406, -59.04429626464844, -153.0414581298828, 195.3233642578125, -233.74935913085938, -63.34016418457031, 375.6766052246094, 454.04388427734375, -232.34274291992188, 182.55679321289062, -59.084442138671875, 493.8299560546875, 1381.041259765625, 499.687255859375, 438.1331787109375, -96.35943603515625, 298.42218017578125, 579.4238891601562, 593.0799560546875, 538.9105834960938, 123.30158996582031, 126.88699340820312, 195.0376739501953, 200.8945770263672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 305.1488037109375, "std": 418.91033935546875, "min": -488.8428039550781, "p10": -219.3354019165039, "median": 281.8427276611328, "p90": 832.65654296875, "max": 1379.1689453125, "pos_frac": 0.71875, "sample": [62.98381805419922, 549.216064453125, -2.421701431274414, 298.88623046875, -79.67729949951172, 395.7424621582031, -197.7044677734375, -4.806892395019531, 452.91748046875, -62.13853454589844, 616.4639282226562, -222.27560424804688, 552.7976684570312, 685.987060546875, 180.24563598632812, 390.38671875, 378.9187927246094, 599.9937133789062, 822.9749755859375, -19.196277618408203, 55.93595886230469, 1379.1689453125, 427.3494567871094, 300.3585205078125, 1150.0303955078125, 611.4003295898438, 173.75857543945312, 264.7992248535156, -79.75967407226562, 156.15719604492188, -308.0769348144531, -54.203468322753906, -488.8428039550781, 195.0123291015625, 836.8057861328125, -393.8951110839844, 2.0727996826171875, -30.09234619140625, 357.2627258300781, -212.4749298095703, 714.2228393554688, 340.788818359375, 557.2026977539062, 218.2772216796875, 689.0008544921875, 989.9441528320312, -17.958160400390625, 260.9992370605469, 936.5105590820312, -347.84356689453125, 174.66917419433594, 1289.284912109375, 221.8723602294922, 710.4321899414062, -245.5205078125, 107.13690185546875, 379.385498046875, 349.697509765625, -389.0574951171875, 443.17254638671875, 494.9582214355469, 550.8771362304688, 1176.1190185546875, 183.2893829345703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 461.2894287109375, "std": 447.3922424316406, "min": -445.7931213378906, "p10": -51.994554901123024, "median": 477.75233459472656, "p90": 918.3858520507813, "max": 1989.075439453125, "pos_frac": 0.875, "sample": [-234.2360076904297, 1032.4107666015625, 323.8491516113281, 203.62203979492188, -25.7427978515625, 922.6107177734375, 693.5916137695312, 119.3047103881836, 174.39041137695312, 573.4315795898438, 661.470458984375, 632.1541748046875, -384.3012390136719, 759.3719482421875, 333.15179443359375, 535.5147094726562, 617.9893798828125, 149.73374938964844, 279.1217041015625, 779.9153442382812, 828.7759399414062, 562.2820434570312, 767.7347412109375, -100.39414978027344, 1013.0131225585938, 266.5164489746094, 598.185791015625, 506.97076416015625, -445.7931213378906, 236.44488525390625, 721.559326171875, 239.10101318359375, 512.0521850585938, 43.07144546508789, 386.1983337402344, 580.55517578125, 778.923095703125, 262.69989013671875, 377.10418701171875, 145.887451171875, 267.12457275390625, 13.539493560791016, 865.1611328125, 626.3763427734375, 128.97572326660156, 438.5160827636719, -151.99996948242188, 220.05130004882812, 14.491462707519531, 657.8328857421875, 576.5697631835938, 1353.9783935546875, 1989.075439453125, 1125.18310546875, 449.96710205078125, 908.52783203125, 831.0694580078125, -63.24530792236328, 1593.7335205078125, 707.17578125, 505.5375671386719, -442.1404113769531, 354.714599609375, 124.0656509399414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 385.66748046875, "std": 459.6060791015625, "min": -1240.8515625, "p10": -138.1509353637695, "median": 397.1847229003906, "p90": 855.1736022949219, "max": 1815.5589599609375, "pos_frac": 0.859375, "sample": [670.2718505859375, 1211.7725830078125, 498.6929931640625, 219.78221130371094, -236.24526977539062, 123.82406616210938, 575.7835083007812, 335.8554382324219, 121.24042510986328, 187.44015502929688, 821.8323364257812, 283.765625, 363.7207946777344, 192.12762451171875, 569.43212890625, 579.9772338867188, 445.3395690917969, 374.8158264160156, 240.57997131347656, 11.632865905761719, -514.2695922851562, 924.5075073242188, 576.8908081054688, 257.5996398925781, 1815.5589599609375, -103.20112609863281, 587.1603393554688, 155.23626708984375, 403.34381103515625, 513.8582153320312, 724.9022827148438, 445.82989501953125, -153.12942504882812, 168.35264587402344, 269.59991455078125, 486.47467041015625, 30.049598693847656, 289.2084655761719, -276.9295959472656, 655.0218505859375, 889.6814575195312, 353.7500305175781, 478.61907958984375, -701.4561157226562, 662.958740234375, 630.9298095703125, 186.60169982910156, 819.59326171875, 111.23844909667969, 863.010498046875, 790.382080078125, -197.87588500976562, 1148.7196044921875, -1240.8515625, 811.8358154296875, 845.2495727539062, -72.50679016113281, 285.50042724609375, 654.8694458007812, 518.1740112304688, 65.06538391113281, 391.025634765625, 859.4267578125, 681.0714721679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 329.5827941894531, "std": 446.45965576171875, "min": -983.389892578125, "p10": -234.09507904052734, "median": 276.3724822998047, "p90": 903.3204711914062, "max": 1264.9014892578125, "pos_frac": 0.796875, "sample": [629.6295166015625, -236.8101348876953, 192.20562744140625, -227.75994873046875, 785.47509765625, 370.23052978515625, 574.0123291015625, 246.0431365966797, 600.1091918945312, 31.982101440429688, 219.74432373046875, 898.5759887695312, -413.1568603515625, 868.6168212890625, -14.01205062866211, 198.23309326171875, 339.77880859375, 18.131793975830078, 387.6112976074219, 173.83392333984375, 536.4549560546875, 207.4625244140625, 495.68829345703125, 446.0543212890625, 873.8941040039062, 180.3922119140625, 51.014869689941406, 793.018310546875, 269.37786865234375, 445.39794921875, 438.14691162109375, 171.878662109375, 746.8355712890625, 1264.9014892578125, -983.389892578125, 971.7013549804688, 383.4032897949219, 783.205322265625, 1035.329833984375, 272.93109130859375, 1011.7869262695312, 4.721805572509766, 205.16375732421875, 218.75717163085938, 608.1786499023438, 1078.0123291015625, -264.963134765625, -490.0031433105469, -14.363014221191406, 113.95233154296875, 930.5225830078125, 265.26165771484375, 833.7568969726562, -71.8602066040039, 826.1756591796875, 415.3643798828125, -540.70703125, 17.722427368164062, 905.3538208007812, -276.5379638671875, 315.8723449707031, -154.91082763671875, -149.94577026367188, 279.8138732910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 357.72528076171875, "std": 639.1348876953125, "min": -1200.0914306640625, "p10": -392.5655181884766, "median": 373.4294128417969, "p90": 1159.6196899414062, "max": 1742.6864013671875, "pos_frac": 0.734375, "sample": [821.8738403320312, 1578.354736328125, 697.4347534179688, 1350.4368896484375, -620.80810546875, -1200.0914306640625, 570.1043090820312, 495.8560485839844, 462.3021545410156, 358.7271728515625, 255.89254760742188, 394.5902404785156, 1657.427978515625, 134.4745330810547, 614.5922241210938, 505.01947021484375, 124.66559600830078, 1061.0567626953125, 1391.8856201171875, 388.13165283203125, 429.38299560546875, -51.03731155395508, 205.95318603515625, 940.5467529296875, -265.56219482421875, -331.9941101074219, -550.0667114257812, 492.0489807128906, 1149.8173828125, 388.29974365234375, 1085.156982421875, 159.88172912597656, 510.19232177734375, -393.2855529785156, 1099.8018798828125, 23.441802978515625, 1163.8206787109375, 119.50872802734375, -90.28643798828125, 896.8204956054688, 295.185546875, -1089.880126953125, 273.19183349609375, -291.2547607421875, 100.81236267089844, 423.8863525390625, -25.45263671875, -388.593017578125, -390.88543701171875, 747.6174926757812, 837.0651245117188, 858.9764404296875, 1240.443359375, 207.30099487304688, 980.31640625, -69.65100860595703, 613.0864868164062, -106.48690795898438, -412.52197265625, 1742.6864013671875, -921.610107421875, 0.4311790466308594, 116.95836639404297, 128.42822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 425.77752685546875, "std": 523.0230712890625, "min": -948.9991455078125, "p10": -98.90053939819335, "median": 360.07505798339844, "p90": 1318.4261352539065, "max": 2104.21533203125, "pos_frac": 0.84375, "sample": [547.3106689453125, 65.49398040771484, 1341.006591796875, 517.0126953125, 1348.5281982421875, 992.1279907226562, 576.5648193359375, 672.848388671875, 555.0065307617188, 592.2227783203125, 335.01611328125, 649.923828125, 146.49191284179688, 359.0225524902344, 628.516357421875, 115.1737060546875, 235.85874938964844, -179.4837188720703, 361.1275634765625, 233.3319091796875, 223.7152099609375, 17.171245574951172, 76.12328338623047, 383.97991943359375, 417.42486572265625, 1354.8809814453125, 423.92218017578125, 1369.3763427734375, 928.8516845703125, 268.7198181152344, -421.4634094238281, -14.897933959960938, 594.9725952148438, 290.73681640625, 477.7413024902344, 503.63360595703125, 74.27533721923828, 146.40151977539062, 599.1309204101562, -948.9991455078125, 1547.95263671875, 86.02766418457031, 505.22210693359375, 326.5586853027344, 319.850830078125, 116.37033081054688, -181.9932861328125, 369.4096984863281, 12.859085083007812, 1265.7384033203125, 192.14224243164062, 1616.3814697265625, 272.5038757324219, 766.6688232421875, -1.5051193237304688, 455.6241455078125, 505.0997009277344, 2104.21533203125, 587.46142578125, -106.75836181640625, 297.30560302734375, -467.86883544921875, -117.73692321777344, -80.56562042236328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 426.458740234375, "std": 486.77899169921875, "min": -621.8057861328125, "p10": -70.66497573852533, "median": 410.1352844238281, "p90": 1115.539990234375, "max": 1689.806884765625, "pos_frac": 0.875, "sample": [898.953125, 552.109619140625, 133.49124145507812, 503.3182678222656, 1689.806884765625, 307.0157775878906, 1065.16064453125, 45.54137420654297, 35.08271789550781, 212.72998046875, 537.999267578125, 107.11076354980469, -558.1090087890625, 52.944000244140625, 1179.1292724609375, 47.04094696044922, 495.3210144042969, 513.2701416015625, 421.85894775390625, 586.0989990234375, 937.3779296875, 258.0368347167969, 146.73194885253906, 196.11973571777344, 1203.935302734375, 7.3616943359375, 857.723876953125, 483.2330017089844, 1196.2880859375, -365.99359130859375, 731.2247314453125, -99.18214416503906, 816.7391357421875, 251.33802795410156, 515.073486328125, 268.1483154296875, 17.463829040527344, -201.46719360351562, 14.92108154296875, -444.671875, 555.7258911132812, 832.8380737304688, 1099.763671875, 78.38850402832031, -398.67401123046875, 198.3758544921875, 300.7576599121094, 1089.709716796875, 398.41162109375, 53.72705841064453, 1122.30126953125, 895.2726440429688, 503.0254821777344, 1217.158935546875, 1258.5452880859375, 718.95361328125, 522.4492797851562, 335.7406005859375, 75.02391052246094, -4.124916076660156, 170.53836059570312, 500.41119384765625, 774.5673217773438, -621.8057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 382.3490905761719, "std": 519.6559448242188, "min": -1197.0074462890625, "p10": -246.27555541992186, "median": 420.7952880859375, "p90": 938.3741943359375, "max": 1710.962646484375, "pos_frac": 0.765625, "sample": [422.60101318359375, 485.396240234375, -152.59210205078125, 144.3245849609375, 250.06826782226562, -49.69841766357422, 291.0252685546875, 111.12071990966797, -117.74258422851562, -692.1322021484375, 584.5222778320312, 709.7218627929688, 547.25341796875, 354.9432067871094, 580.0753173828125, 861.8692626953125, -256.93414306640625, 691.854248046875, -1197.0074462890625, 759.504638671875, -177.55404663085938, 780.5144653320312, 202.57546997070312, -359.461181640625, 277.791015625, 180.74136352539062, -199.11178588867188, 376.0645446777344, 418.98956298828125, 347.3941955566406, 646.6506958007812, -310.69049072265625, 1238.5111083984375, 111.75996398925781, 97.72972869873047, 769.7713623046875, 465.4241027832031, 590.6182861328125, 1710.962646484375, 645.3853759765625, 1384.830322265625, 131.71875, 741.4918212890625, 946.675537109375, -102.26707458496094, 1395.1285400390625, 556.2603759765625, 573.4183959960938, 689.6993408203125, 919.00439453125, 473.4637451171875, -329.8443603515625, -221.405517578125, 225.4476318359375, 561.6051635742188, 778.0030517578125, -366.9586181640625, 1353.83984375, 778.869384765625, -137.3120880126953, 1007.8732299804688, 138.02151489257812, 297.4779968261719, 533.0611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 337.25445556640625, "std": 534.5740966796875, "min": -808.9127197265625, "p10": -281.991748046875, "median": 215.23522186279297, "p90": 904.3895996093753, "max": 2161.000732421875, "pos_frac": 0.765625, "sample": [1380.9342041015625, -22.949783325195312, -51.96917724609375, 707.325927734375, 368.26318359375, 670.3727416992188, 735.1215209960938, 2161.000732421875, 940.7059326171875, 250.87579345703125, 23.120773315429688, 334.4459228515625, -37.87782287597656, 158.96633911132812, 398.4000244140625, 29.391414642333984, -291.26922607421875, -260.34429931640625, 109.9051513671875, 14.43239974975586, 1691.2515869140625, -331.0377197265625, 330.570068359375, 523.9513549804688, 585.821533203125, -70.1713638305664, -310.0867004394531, -19.425504684448242, -370.0036315917969, 552.2576904296875, -253.4915313720703, 1096.846435546875, 741.247314453125, 964.462158203125, 465.8759765625, 669.0374755859375, 775.554443359375, 150.97584533691406, 179.5946502685547, 557.2435913085938, 124.02933502197266, 168.86416625976562, 114.53960418701172, 51.3770866394043, 693.2354736328125, 1781.0272216796875, 129.23211669921875, -808.9127197265625, 681.7642822265625, 31.388351440429688, -336.25091552734375, 149.619140625, 47.609535217285156, 381.60760498046875, 80.43870544433594, 542.0101928710938, 10.067544937133789, -235.9789276123047, 439.74053955078125, 277.5638732910156, 672.7771606445312, 819.6514892578125, -351.1651916503906, 570.724853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 391.2642517089844, "std": 633.5305786132812, "min": -1223.4215087890625, "p10": -179.12201690673828, "median": 256.49688720703125, "p90": 1307.8374877929691, "max": 2269.4189453125, "pos_frac": 0.796875, "sample": [-1223.4215087890625, -408.9326171875, 785.087890625, -432.4757995605469, 211.2342987060547, 179.70932006835938, 607.4807739257812, -176.72671508789062, -28.411161422729492, -741.6199340820312, 283.46795654296875, 46.545230865478516, 143.4084014892578, 605.297607421875, 52.312347412109375, 935.2261962890625, -43.98500061035156, 595.3892822265625, 318.5499267578125, 82.9395751953125, 1210.662841796875, 83.50155639648438, 49.64697265625, 16.7265625, 253.4691162109375, 529.2930908203125, 349.1549377441406, -146.22708129882812, 279.1221923828125, 796.271484375, 36.754150390625, 949.66015625, 185.85812377929688, 932.0855712890625, 2159.864501953125, 1735.32568359375, 236.1527862548828, 1460.594482421875, 259.524658203125, 469.9618225097656, 357.09539794921875, 940.399169921875, 100.73342895507812, 271.3155517578125, -288.05780029296875, 1376.48828125, 1663.35546875, 125.69624328613281, 1349.4837646484375, 112.44129943847656, 2269.4189453125, -82.41404724121094, -180.14857482910156, -90.9792709350586, 900.5864868164062, 771.728515625, 724.9265747070312, 213.46591186523438, 300.1445617675781, -501.47296142578125, 281.6714782714844, 435.0387268066406, 227.54965209960938, 123.9660873413086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 190.77340698242188, "std": 790.5775146484375, "min": -2009.25244140625, "p10": -474.7333984375, "median": 212.10186767578125, "p90": 966.0138671875002, "max": 3445.0283203125, "pos_frac": 0.6875, "sample": [-190.26144409179688, 780.9010620117188, 176.4133758544922, -284.988037109375, -463.6585388183594, 1098.2509765625, -98.857666015625, 192.81741333007812, 336.058349609375, 327.974365234375, 1343.15234375, 647.5454711914062, -59.14961242675781, 145.76971435546875, -501.5101318359375, -411.8907470703125, 55.358551025390625, 22.369033813476562, 198.88966369628906, 1420.44970703125, -278.1246643066406, -103.06269836425781, 624.5054931640625, 300.81109619140625, 150.98403930664062, 148.7679443359375, 322.787841796875, 245.3024139404297, 215.05947875976562, 75.96005249023438, -2009.25244140625, 64.0153579711914, -634.200927734375, 328.4781494140625, -1612.131103515625, -1769.5201416015625, 3445.0283203125, 355.58001708984375, 489.00347900390625, 1206.199951171875, 225.46238708496094, 283.4686279296875, 810.540771484375, 819.251708984375, 247.38673400878906, 644.1966552734375, -415.9095764160156, -479.4797668457031, 244.1255645751953, 982.569091796875, 506.0470275878906, 927.385009765625, -443.64019775390625, -237.76678466796875, 156.58419799804688, -209.13607788085938, 912.060546875, -1463.4027099609375, 536.733642578125, 221.35324096679688, -340.78106689453125, 379.53607177734375, 209.14425659179688, 1391.943603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 333.6046142578125, "std": 605.7875366210938, "min": -1434.7742919921875, "p10": -378.3375366210937, "median": 322.23268127441406, "p90": 1002.8488647460939, "max": 2146.451171875, "pos_frac": 0.765625, "sample": [-1143.5460205078125, 272.4065856933594, 159.73074340820312, 1013.7911376953125, 713.122802734375, -480.0802917480469, 977.31689453125, 150.53868103027344, 279.1629333496094, 580.2728881835938, 1123.809814453125, 2146.451171875, 32.70347595214844, 336.2927551269531, 113.84102630615234, 911.093017578125, 326.3410339355469, -15.2945556640625, -439.30828857421875, 441.16217041015625, 13.875564575195312, 381.0246887207031, -179.29962158203125, 108.43283081054688, 292.602783203125, 697.8671264648438, 808.338623046875, 318.12432861328125, 555.3652954101562, 379.86065673828125, 713.975830078125, -71.59417724609375, 2.4315719604492188, 285.7515869140625, 604.2756958007812, 291.27142333984375, 589.841552734375, 113.26702880859375, -329.22705078125, -399.3848876953125, 754.602783203125, -87.280517578125, 710.9343872070312, 471.19561767578125, 253.876708984375, 276.4577941894531, 1380.510498046875, 1179.859375, -75.95100402832031, -791.2711791992188, -216.06927490234375, 831.2565307617188, 432.73638916015625, 662.1351928710938, 519.0987548828125, 214.7058868408203, 398.0863037109375, -835.3408813476562, 766.3970336914062, -115.37939453125, 481.884033203125, 1248.1351318359375, -1434.7742919921875, 1648.28125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 425.5621643066406, "std": 455.403564453125, "min": -325.1300964355469, "p10": -150.3512252807617, "median": 403.61981201171875, "p90": 1032.6631042480476, "max": 1597.5732421875, "pos_frac": 0.828125, "sample": [127.891357421875, 20.503406524658203, 361.3897399902344, 155.7947235107422, -130.90814208984375, 636.197265625, 1597.5732421875, 356.5804443359375, 1181.8917236328125, 1113.096923828125, 268.0998840332031, 282.9210205078125, 444.16375732421875, 0.9193115234375, 546.3062744140625, -311.0044250488281, 701.8920288085938, 18.437395095825195, 795.6079711914062, -96.44927215576172, 759.1314697265625, 635.9500732421875, 734.7852783203125, 169.23272705078125, 95.81924438476562, 316.4253845214844, 385.7467346191406, 470.0815734863281, 20.135692596435547, 541.62451171875, 279.3070068359375, 562.0228271484375, -168.0708770751953, 293.8802185058594, 644.5640258789062, 808.5480346679688, 432.44195556640625, 771.1323852539062, -325.1300964355469, 1325.6865234375, 440.9500732421875, -21.778888702392578, 279.8105163574219, -243.6293182373047, 814.9396362304688, 74.02738952636719, 559.69482421875, -158.68397521972656, 256.345458984375, 661.8447265625, -114.98019409179688, 830.0009765625, 667.8836669921875, 34.002410888671875, 1563.265380859375, -240.14259338378906, 229.95252990722656, 1420.9788818359375, 684.102783203125, 1162.165283203125, 456.3647766113281, 844.9841918945312, -211.8318634033203, 421.4928894042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 334.732177734375, "std": 458.1792297363281, "min": -507.0128173828125, "p10": -167.0396286010742, "median": 248.71648406982422, "p90": 885.7581787109378, "max": 2070.68896484375, "pos_frac": 0.796875, "sample": [729.1324462890625, 180.29612731933594, -110.63899993896484, 723.5194702148438, 262.953369140625, 477.8536071777344, 1393.4205322265625, 130.80453491210938, 155.30245971679688, -153.5040283203125, 1039.9520263671875, 194.688720703125, 154.6448211669922, 409.33184814453125, 796.4791259765625, 462.8297119140625, 717.7493286132812, 66.58785247802734, 229.93093872070312, 124.3364028930664, 594.1071166992188, 284.0408630371094, -33.66902160644531, 1204.0284423828125, 573.87158203125, 598.3158569335938, -507.0128173828125, -258.44525146484375, 677.9175415039062, 84.39131927490234, 6.42822265625, 147.12350463867188, -338.1482849121094, -162.36904907226562, 494.4393310546875, -169.0413055419922, -144.044677734375, -320.9961242675781, 706.98046875, 99.14842987060547, 42.014102935791016, 623.7955932617188, 276.8083190917969, -190.7311553955078, 146.46080017089844, 234.47959899902344, 755.3050537109375, 1033.8377685546875, 2070.68896484375, 924.0206298828125, 352.61309814453125, -18.951461791992188, 298.17352294921875, 204.1329345703125, 101.00486755371094, 104.28099060058594, 535.0633544921875, 482.31658935546875, 145.58518981933594, 365.9080810546875, 357.9033203125, 1122.153076171875, 368.42254638671875, -435.1649475097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 337.63336181640625, "std": 577.7583618164062, "min": -1661.1959228515625, "p10": -262.12548217773434, "median": 271.6797409057617, "p90": 1039.3967163085938, "max": 2064.71484375, "pos_frac": 0.71875, "sample": [-600.7691650390625, 418.06158447265625, 925.5632934570312, -500.83502197265625, 1484.9112548828125, 243.52870178222656, 232.41946411132812, 898.1209106445312, 830.3021240234375, 1099.4765625, -0.6794662475585938, 132.11441040039062, 216.37986755371094, 819.93212890625, 1053.911376953125, -139.68479919433594, 744.2079467773438, 464.8438720703125, 1019.7991943359375, -223.99700927734375, 599.5087890625, 96.8736572265625, -14.664077758789062, 186.4499969482422, -275.4808654785156, 775.9212646484375, 396.1228942871094, 121.75711059570312, -59.854461669921875, -85.04150390625, -524.4625244140625, 545.137939453125, -1661.1959228515625, -663.662109375, 1321.934814453125, 203.01211547851562, 299.8307800292969, 168.86968994140625, 534.1300048828125, 15.35647964477539, 616.5628662109375, -78.346435546875, 589.3990478515625, 1077.253173828125, 385.8705749511719, 315.1685485839844, 307.7203674316406, 860.6885986328125, 498.4253845214844, 231.76519775390625, 1047.795654296875, -230.96292114257812, -48.274314880371094, 229.57958984375, -7.126312255859375, 986.20361328125, 551.7381591796875, -94.38424682617188, 46.734161376953125, -275.8436279296875, 82.14705657958984, 661.6337280273438, 2064.71484375, 691.9232177734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 388.6256103515625, "std": 510.8751525878906, "min": -870.3444213867188, "p10": -247.72069702148434, "median": 368.1724853515625, "p90": 1159.2387084960942, "max": 1599.2955322265625, "pos_frac": 0.765625, "sample": [-262.3404235839844, 172.45985412597656, 829.5409545898438, 162.94805908203125, -20.975379943847656, 409.33087158203125, 298.3567810058594, -476.202392578125, 540.3209228515625, -40.34271240234375, 414.42633056640625, 592.90869140625, -526.7421875, -459.22705078125, 343.4963073730469, 895.9783325195312, 821.140869140625, 1599.2955322265625, 402.57977294921875, 153.95950317382812, -322.044921875, -213.60800170898438, -457.87078857421875, 166.50735473632812, 409.09771728515625, 1068.557861328125, 565.8927612304688, 450.5475769042969, -76.79861450195312, 364.8167724609375, 1455.3497314453125, 972.5932006835938, 323.58355712890625, 262.5126647949219, 394.8369140625, 262.8705749511719, 1308.734375, 1198.1019287109375, 793.3692626953125, -155.13096618652344, 638.6585693359375, -62.16968536376953, 132.12939453125, -81.88397979736328, 833.7070922851562, 388.2063293457031, 156.4044647216797, 1233.6043701171875, 210.1139373779297, 371.5281982421875, 616.8068237304688, -19.488075256347656, 343.77972412109375, 708.0178833007812, 344.96685791015625, 296.6738586425781, 1241.29931640625, 671.9154052734375, 1323.069580078125, -870.3444213867188, 706.3804321289062, 403.4163513183594, 653.2312622070312, 9.183822631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 374.60540771484375, "std": 632.0258178710938, "min": -1363.209228515625, "p10": -287.6566528320312, "median": 269.03380584716797, "p90": 1102.8453002929687, "max": 2727.137939453125, "pos_frac": 0.703125, "sample": [5.996192932128906, -3.2132949829101562, 1064.7755126953125, 561.4476318359375, 1038.1873779296875, 1202.954833984375, -425.014892578125, -231.98043823242188, 225.9364776611328, 27.508207321166992, 879.656982421875, -202.476318359375, -268.48663330078125, 300.68072509765625, 775.0555419921875, 236.39984130859375, 237.3868865966797, -171.86767578125, 1040.1300048828125, 457.4316101074219, 194.74819946289062, 463.71990966796875, -109.74812316894531, 562.9030151367188, 2727.137939453125, 1447.61865234375, 662.9180297851562, 754.824951171875, 749.6915283203125, 504.035400390625, 1108.440673828125, 218.82647705078125, 229.3702392578125, 864.9176025390625, -748.6101684570312, 1674.379638671875, -56.755584716796875, 731.1456909179688, 532.9608154296875, 172.66136169433594, 396.4087829589844, 1097.83447265625, 729.4635009765625, 428.6699523925781, -105.84788513183594, -1363.209228515625, 460.14398193359375, -295.87237548828125, -321.8842468261719, -103.0093994140625, 143.61480712890625, -55.410072326660156, -476.08612060546875, -453.24774169921875, 214.87550354003906, -127.0279769897461, 545.7835083007812, -78.90380859375, 111.8714599609375, 1352.49951171875, 398.75799560546875, 1104.9927978515625, 150.8889617919922, 783.7439575195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 292.4875183105469, "std": 583.5133666992188, "min": -972.6875610351562, "p10": -468.8891601562499, "median": 275.0008239746094, "p90": 998.4779479980471, "max": 2235.254638671875, "pos_frac": 0.6875, "sample": [-512.4365844726562, 408.09490966796875, -176.88925170898438, 146.82537841796875, 624.5467529296875, -53.419532775878906, 269.1075744628906, 952.1695556640625, 677.9053955078125, 805.5462036132812, 645.9774169921875, 1330.4853515625, 538.1282958984375, -367.27850341796875, -548.4324340820312, -587.1636962890625, 571.95361328125, 863.1361083984375, -139.86318969726562, 432.8157958984375, 80.3291244506836, -79.72673034667969, 20.172897338867188, 460.99114990234375, 481.7662048339844, 6.4832611083984375, 2235.254638671875, 256.15850830078125, 650.6190795898438, -292.7894287109375, 788.1202392578125, 1018.3244018554688, 834.239013671875, -241.83311462402344, -582.0963134765625, 315.6290588378906, 464.65228271484375, -124.14756774902344, 477.15960693359375, -972.6875610351562, -711.4833984375, -747.78125, 678.4037475585938, 1034.6339111328125, 1466.798095703125, -162.10916137695312, 280.8940734863281, 452.40203857421875, -63.42104721069336, 418.6148986816406, -9.387229919433594, -357.74139404296875, 132.66259765625, 148.45553588867188, 161.45913696289062, 26.999011993408203, 191.04193115234375, 1.0400047302246094, 703.7994995117188, 1082.838134765625, 529.4244995117188, -195.84498596191406, 1175.1231689453125, 804.5513916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 456.96002197265625, "std": 579.5945434570312, "min": -911.3818359375, "p10": -227.69408874511717, "median": 423.5525817871094, "p90": 1064.8568115234377, "max": 2201.529541015625, "pos_frac": 0.765625, "sample": [2201.529541015625, -911.3818359375, 594.7909545898438, 304.0985107421875, -238.08213806152344, -60.86076354980469, -117.94317626953125, 424.1324157714844, 475.1095275878906, 1594.46728515625, 109.37438201904297, -232.19497680664062, 140.86940002441406, -319.3868408203125, 468.91400146484375, 908.1383056640625, -113.84222412109375, 722.1388549804688, 1531.457763671875, -49.67362976074219, 26.242919921875, 851.6428833007812, 379.4543762207031, 814.4594116210938, 439.6826477050781, 204.035400390625, 169.69287109375, 1025.041748046875, 713.6705322265625, -427.0041809082031, 154.41583251953125, 825.05078125, 464.95355224609375, -262.79443359375, 691.4578247070312, 839.7081298828125, -206.34402465820312, -36.73530960083008, 1081.92041015625, 469.3951416015625, -217.1920166015625, 119.47712707519531, 1006.3399658203125, 894.4212036132812, 44.072364807128906, 61.95098114013672, 1641.8045654296875, 253.27285766601562, 1639.2403564453125, 1005.3841552734375, 264.989990234375, -6.9840240478515625, 422.9727478027344, 597.6582641601562, 554.6220092773438, 185.461669921875, 242.85848999023438, 898.4878540039062, 843.8375244140625, 817.990234375, 1477.4178466796875, -248.5970916748047, 352.05084228515625, 744.3030395507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 204.8200225830078, "std": 610.3878173828125, "min": -2033.191650390625, "p10": -393.6688323974609, "median": 255.36609649658203, "p90": 846.9638122558597, "max": 1735.81787109375, "pos_frac": 0.6875, "sample": [300.9103088378906, -48.23313903808594, 346.6501159667969, -956.850341796875, 441.79815673828125, 621.1365966796875, -992.8858642578125, 623.6550903320312, 768.7550048828125, 458.42706298828125, 367.6843566894531, 692.5654907226562, -17.401779174804688, 258.199462890625, 708.2902221679688, 1241.0794677734375, 246.02621459960938, -351.52606201171875, 345.4556579589844, 319.1812744140625, -134.07444763183594, 473.1952209472656, 406.09716796875, 880.4818725585938, -864.7908935546875, 331.5360107421875, 75.98430633544922, 917.4087524414062, 252.53273010253906, 470.046875, 553.1123046875, 660.7402954101562, 694.5510864257812, 237.62057495117188, -336.7836608886719, 199.96566772460938, 899.0927124023438, -409.5626525878906, 1415.3853759765625, -356.583251953125, 497.5389099121094, 71.1136245727539, -229.58819580078125, 33.38300323486328, 176.61033630371094, 84.98544311523438, -2033.191650390625, 350.07891845703125, 1735.81787109375, -26.26136016845703, 27.541656494140625, -66.87727355957031, -24.393798828125, 401.22918701171875, -1133.609619140625, 290.1982727050781, 167.86856079101562, 1284.9522705078125, -167.0189208984375, 105.00651550292969, -159.18731689453125, -124.18021392822266, 719.3013916015625, -611.7101440429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 471.5952453613281, "std": 590.0224609375, "min": -506.3753967285156, "p10": -257.17372436523436, "median": 389.31396484375, "p90": 1172.659411621094, "max": 2983.52734375, "pos_frac": 0.796875, "sample": [-143.62464904785156, 186.33755493164062, 1472.5941162109375, 810.2893676757812, -285.40771484375, 832.5061645507812, 418.663330078125, 350.4784240722656, 718.5986938476562, 701.2371215820312, 412.306396484375, 778.346435546875, 526.0657958984375, 114.72633361816406, 253.69979858398438, 234.37132263183594, 1023.7302856445312, 899.26611328125, -368.93896484375, 542.5787353515625, 373.6595764160156, -18.286794662475586, 799.344970703125, 950.956298828125, 40.78578186035156, -259.7964172363281, 1233.59375, 713.738525390625, 37.52745056152344, 1098.869140625, -366.2361145019531, 1374.391357421875, 54.602256774902344, 867.9727172851562, -3.5592193603515625, 1204.2838134765625, 344.6530456542969, 795.8031616210938, 216.54568481445312, 1031.837890625, 774.2996215820312, 587.9826049804688, 505.44598388671875, -401.43560791015625, 122.5911636352539, 318.712158203125, 404.9683532714844, 293.3885498046875, 1068.5921630859375, -229.46827697753906, 226.36297607421875, -506.3753967285156, 142.16305541992188, 2983.52734375, -240.76646423339844, 148.57383728027344, 1322.9681396484375, 249.0318603515625, 610.7496337890625, -251.05410766601562, 825.9345703125, 1304.0875244140625, 231.42462158203125, -278.12005615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 425.05419921875, "std": 523.9536743164062, "min": -696.6483154296875, "p10": -170.56840515136716, "median": 357.87937927246094, "p90": 1070.781640625, "max": 1877.45263671875, "pos_frac": 0.796875, "sample": [401.58203125, -211.80503845214844, 1877.45263671875, 165.2385711669922, -153.2296905517578, -266.300537109375, 498.94830322265625, 89.88347625732422, 1794.389404296875, 159.64190673828125, 99.99806213378906, 473.3793029785156, 496.50958251953125, 582.4371337890625, 913.4927368164062, 710.1150512695312, 356.369384765625, 97.965576171875, -201.76495361328125, -696.6483154296875, 310.9489440917969, 18.750354766845703, 1090.988037109375, 820.8267211914062, 1772.5634765625, 98.44425964355469, 426.3265380859375, 763.9256591796875, 28.583324432373047, -71.78939819335938, -68.69705200195312, -20.87713623046875, 773.9132080078125, 1125.8504638671875, -12.538047790527344, 214.13348388671875, 270.969970703125, 149.12118530273438, 186.3330841064453, -197.7363739013672, 7.247108459472656, 359.3893737792969, 833.3582153320312, 436.7260437011719, 1252.4085693359375, 23.8287353515625, 683.2669067382812, 830.83349609375, 426.02490234375, -223.09011840820312, -43.94252014160156, 789.766357421875, 1070.609375, 964.0714111328125, 42.41032409667969, 1070.85546875, 988.5059814453125, 449.168212890625, 547.8555297851562, 151.06442260742188, 859.2567138671875, 51.24421691894531, -177.99928283691406, 942.9155883789062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 407.4237060546875, "std": 572.7925415039062, "min": -896.6678466796875, "p10": -142.7167755126953, "median": 290.84515380859375, "p90": 1159.6385986328128, "max": 2123.557861328125, "pos_frac": 0.765625, "sample": [676.7897338867188, 683.9628295898438, 34.26008605957031, 260.8367919921875, 418.4757385253906, 139.9417724609375, 1114.8638916015625, 122.06289672851562, 460.6751708984375, 547.3851318359375, 246.28167724609375, 44.58519744873047, 1380.1636962890625, 1367.6053466796875, 188.41448974609375, 1114.53955078125, -496.84814453125, -19.50889015197754, 1094.7991943359375, 978.23876953125, 1050.69189453125, 1373.77880859375, 177.29937744140625, 515.99560546875, 2123.557861328125, 1178.8277587890625, -35.59052276611328, -20.814138412475586, 802.3333129882812, -139.28579711914062, -101.82669830322266, -37.180145263671875, -385.1357421875, 209.22598266601562, 237.41165161132812, 785.6971435546875, 439.9798583984375, 70.03925323486328, 205.83761596679688, 649.0140380859375, -134.54180908203125, 144.45761108398438, 1868.46728515625, 414.43841552734375, -144.18719482421875, 366.1753234863281, 352.111083984375, 457.49456787109375, 320.853515625, -590.797119140625, 1195.9786376953125, 967.9332275390625, 41.39828872680664, -274.3791809082031, 63.38874816894531, -277.3592529296875, 860.72314453125, 157.99314880371094, 358.4664001464844, 151.83953857421875, -1.1259689331054688, -896.6678466796875, 856.930419921875, 358.1445007324219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 463.13702392578125, "std": 695.4199829101562, "min": -1186.5194091796875, "p10": -465.84015502929685, "median": 419.19732666015625, "p90": 1324.3428833007813, "max": 2911.0380859375, "pos_frac": 0.796875, "sample": [984.936767578125, 586.706298828125, 104.83576965332031, 446.8116760253906, 420.0394287109375, -718.9918212890625, 171.15652465820312, 92.95755767822266, 901.4923706054688, -127.47894287109375, 521.9517211914062, 1477.40576171875, 644.9793090820312, -814.1643676757812, -485.772705078125, 1469.501953125, 1037.3304443359375, 975.8670654296875, -216.29869079589844, 1105.3197021484375, 1325.2147216796875, -419.33087158203125, 1637.413818359375, 1148.939453125, 232.7906036376953, -272.4779968261719, 651.0650634765625, 467.3978576660156, 81.20510864257812, 1543.1312255859375, 1265.06982421875, 408.5501708984375, -507.2752685546875, -770.9904174804688, 274.73236083984375, 335.412109375, 156.47900390625, 1434.0831298828125, 844.4138793945312, 814.8089599609375, 332.3921203613281, 705.3790283203125, 418.355224609375, -77.43450927734375, 592.82568359375, 962.5802001953125, 955.2091064453125, 580.8388671875, 763.8248901367188, 1322.30859375, 179.27761840820312, 316.5377502441406, 84.81942749023438, 411.3797607421875, 676.0267333984375, 137.9365997314453, 64.64554595947266, 492.8343811035156, -1186.5194091796875, 14.897476196289062, 351.5884094238281, -31.01751708984375, -568.1734619140625, 2911.0380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 340.75640869140625, "std": 542.5021362304688, "min": -1719.83154296875, "p10": -214.32758331298822, "median": 313.9120330810547, "p90": 1025.3808471679688, "max": 1800.725830078125, "pos_frac": 0.8125, "sample": [291.383544921875, 1187.3359375, 213.0017852783203, -381.7068176269531, 1025.6912841796875, 1022.4168701171875, 740.553466796875, 523.648193359375, 1604.320556640625, -43.50279998779297, 152.48028564453125, -467.70281982421875, 1075.0406494140625, -236.21054077148438, 109.67638397216797, 1800.725830078125, 364.81658935546875, -400.0660400390625, 763.7339477539062, 749.2835083007812, 453.5335693359375, 612.03662109375, -1719.83154296875, 163.23251342773438, -117.20098114013672, 406.10845947265625, 44.425010681152344, 1024.656494140625, 1291.10791015625, 87.4507064819336, 217.88426208496094, 290.5885009765625, 422.39996337890625, 574.5152587890625, 213.800537109375, -600.8726806640625, 206.11746215820312, -556.3402099609375, 601.6635131835938, 315.0461730957031, 351.93212890625, 87.6390380859375, 556.3331298828125, -163.26734924316406, 485.24481201171875, 152.93359375, 199.55467224121094, 326.314208984375, 390.62322998046875, 198.765625, 141.9382781982422, 482.90631103515625, 312.77789306640625, 434.4873046875, 120.23854064941406, -111.31109619140625, 197.48593139648438, 1107.1182861328125, 663.1853637695312, 697.7657470703125, 42.940765380859375, 601.1774291992188, -130.64700317382812, 637.031982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 355.7095642089844, "std": 450.18145751953125, "min": -560.2025146484375, "p10": -117.80824890136718, "median": 326.14768981933594, "p90": 1031.1778137207032, "max": 1611.607666015625, "pos_frac": 0.75, "sample": [-4.437416076660156, 11.72964859008789, 1415.878173828125, 267.93804931640625, 1010.3440551757812, 213.59727478027344, 680.1366577148438, -45.40379333496094, -122.06472778320312, 342.2532653808594, 420.9647521972656, 229.4607391357422, 589.0490112304688, 1142.701904296875, 347.32562255859375, 816.6832885742188, -40.345306396484375, 35.23828887939453, -560.2025146484375, -107.87646484375, 490.7405700683594, -240.68045043945312, 532.7724609375, 287.8017272949219, 44.93446350097656, -143.8692626953125, 87.3076171875, 604.277099609375, 1611.607666015625, 230.3101043701172, 310.0421142578125, 617.895751953125, -402.2997741699219, 1040.1065673828125, 745.0183715820312, 106.59654235839844, 1249.0523681640625, 450.0151062011719, 351.3777770996094, 139.35748291015625, 597.3113403320312, 837.853271484375, 216.67242431640625, 361.3077392578125, 204.64503479003906, 426.4718322753906, 586.7470092773438, 1243.807373046875, -164.76116943359375, -68.60221099853516, -32.50414276123047, 504.7195129394531, 435.3901062011719, 22.970006942749023, 436.8731689453125, 583.5741577148438, -72.01229858398438, 442.0944519042969, -0.7767372131347656, -88.21521759033203, -416.9764404296875, 532.6712646484375, 269.1953125, 1151.621826171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 364.5850830078125, "std": 513.8212890625, "min": -522.4893798828125, "p10": -121.11866607666013, "median": 222.79039001464844, "p90": 1086.1463195800784, "max": 1970.588134765625, "pos_frac": 0.734375, "sample": [793.2755737304688, 88.11720275878906, 228.8152313232422, 74.55415344238281, 1.464620590209961, 145.283447265625, 219.530029296875, -80.1435317993164, 111.43795776367188, 226.05075073242188, -182.72291564941406, 268.1989440917969, 539.1917724609375, 813.3683471679688, 461.5899963378906, 81.76029968261719, -203.16822814941406, 348.51580810546875, -67.03583526611328, 896.8272094726562, -133.12693786621094, -86.24026489257812, 270.06683349609375, 211.20062255859375, -24.29189682006836, 159.33767700195312, 107.22209167480469, -8.886409759521484, 209.0474853515625, 679.28857421875, -93.099365234375, 50.50471496582031, -29.533479690551758, -154.8638916015625, 1232.159423828125, 650.0856323242188, 18.822185516357422, 109.99861145019531, 362.53460693359375, -522.4893798828125, 758.9866333007812, -341.5581359863281, 1314.188720703125, -344.3057861328125, 1120.8050537109375, 1005.2759399414062, 641.21533203125, 1970.588134765625, 319.51611328125, 707.2462158203125, 1812.7294921875, 632.6190185546875, 537.772705078125, 1538.961669921875, -67.93111419677734, -33.83618927001953, 718.6368408203125, 470.3979797363281, 562.71630859375, 549.5904541015625, -5.186431884765625, 1299.6048583984375, 380.2680969238281, 12.4947509765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 369.5591735839844, "std": 634.1473388671875, "min": -1115.2021484375, "p10": -386.57457885742184, "median": 215.93301391601562, "p90": 1244.4917602539062, "max": 1904.1964111328125, "pos_frac": 0.78125, "sample": [72.7960205078125, -1114.7445068359375, 1014.1502075195312, -163.43479919433594, 161.00106811523438, -63.376731872558594, 101.66351318359375, 808.1078491210938, 763.1318969726562, -571.4889526367188, 158.70556640625, -72.47634887695312, 1353.35546875, 109.51089477539062, 521.6034545898438, -342.6574401855469, -202.12771606445312, 1220.7808837890625, 645.2865600585938, -598.828125, 32.60380172729492, 147.401611328125, -633.2100830078125, 86.33613586425781, -1115.2021484375, 1254.653564453125, 125.59048461914062, 481.3035888671875, 423.4412536621094, 757.4583129882812, 343.35308837890625, 1904.1964111328125, 167.00921630859375, 559.1476440429688, 52.464439392089844, -161.8052978515625, 1715.9556884765625, 782.5059814453125, 1684.065185546875, 87.26527404785156, 221.81292724609375, 292.2701721191406, 1272.804443359375, 695.1143798828125, 135.77293395996094, 844.7117309570312, -405.3962097167969, 712.26513671875, 46.07452392578125, 543.7313842773438, 301.14288330078125, -511.24334716796875, 1672.822998046875, -27.06451416015625, 973.437744140625, 1005.1787719726562, 66.09051513671875, 205.16439819335938, 566.809814453125, 895.4649658203125, 59.39236831665039, 794.427490234375, 585.456298828125, 210.0531005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 246.5, "std": 469.7717590332031, "min": -850.2374267578125, "p10": -316.23651733398435, "median": 188.40196228027344, "p90": 818.9652465820313, "max": 1560.0478515625, "pos_frac": 0.71875, "sample": [332.3076171875, -250.8646697998047, 824.7731323242188, 335.7781982421875, 1371.771484375, 189.75035095214844, 368.3946228027344, -120.04121398925781, 444.5138854980469, 98.4691162109375, 209.49574279785156, 78.33644104003906, 58.531105041503906, -392.038818359375, 329.6945495605469, 100.00410461425781, 679.2962036132812, 396.63330078125, -73.68534851074219, 443.3080749511719, 141.11666870117188, 57.28156280517578, 972.12744140625, -397.7422790527344, 239.0082550048828, 242.2712860107422, 628.8316650390625, -264.3716735839844, 805.4135131835938, 89.99256134033203, 1159.4390869140625, 436.1997985839844, -850.2374267578125, 114.64842987060547, -299.1187744140625, 279.4729309082031, -39.489540100097656, 1087.821044921875, 201.70484924316406, 436.87750244140625, 669.416015625, -539.5288696289062, 1560.0478515625, 85.84078979492188, -202.5656280517578, 218.14212036132812, -323.57269287109375, 156.8202667236328, -55.47638702392578, 608.6478271484375, 68.17405700683594, -403.6824951171875, 722.7918090820312, -98.63127136230469, -51.79511260986328, 187.05357360839844, 352.45635986328125, 801.990234375, 120.69648742675781, -50.99516296386719, 1325.952880859375, 75.29058074951172, 456.6455078125, -373.3934020996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 251.22274780273438, "std": 555.3359375, "min": -864.7789306640625, "p10": -403.88747558593747, "median": 178.0927734375, "p90": 1076.2764282226562, "max": 1394.9505615234375, "pos_frac": 0.71875, "sample": [182.66268920898438, 91.04386901855469, 474.4763488769531, 41.30583953857422, 233.78268432617188, 45.56297302246094, 76.38412475585938, 667.6182861328125, -559.1774291992188, -189.9828338623047, 173.52285766601562, 586.7904052734375, 702.1680908203125, -351.4785461425781, -864.7789306640625, 1065.382080078125, -163.66287231445312, 1223.4588623046875, -143.1512451171875, 1394.9505615234375, -537.5582885742188, 1326.80615234375, 397.5498962402344, -326.6601867675781, 336.05364990234375, 368.450927734375, 282.2084045410156, -413.80926513671875, 78.80351257324219, 300.24462890625, 2.0770263671875, 20.3430233001709, 184.66629028320312, -789.8088989257812, -191.942626953125, 1140.5592041015625, 938.33642578125, 268.05426025390625, -700.071044921875, 34.67571258544922, 1080.9454345703125, -739.7998046875, 1140.9527587890625, -20.728797912597656, -100.28889465332031, 833.4739990234375, 221.06199645996094, 94.177978515625, 1034.345458984375, 36.70064926147461, 402.6562805175781, -18.957931518554688, 245.65545654296875, -380.73663330078125, 1030.41259765625, 918.81298828125, 599.7330932617188, 93.34687805175781, 58.23930740356445, 572.7452392578125, -247.09182739257812, 1381.053955078125, 61.299766540527344, 374.3887939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 275.09185791015625, "std": 510.8553466796875, "min": -1727.6539306640625, "p10": -183.99522857666014, "median": 187.83468627929688, "p90": 1031.7489257812501, "max": 1375.0899658203125, "pos_frac": 0.703125, "sample": [990.2393798828125, 78.16179656982422, 489.4042663574219, 280.1079406738281, -103.54647827148438, 1251.671142578125, 890.916259765625, -29.902130126953125, 147.77975463867188, -510.15399169921875, 464.65533447265625, 96.87934875488281, 1038.641357421875, 462.12335205078125, 420.17669677734375, 148.42660522460938, -286.0887756347656, 401.5465393066406, 140.72222900390625, 42.963050842285156, 144.30882263183594, 325.5793151855469, 445.0342712402344, 350.470703125, -166.22235107421875, 432.554931640625, 636.3797607421875, -188.4466552734375, -29.9953556060791, -133.73043823242188, 36.93914794921875, -1727.6539306640625, 449.55120849609375, 8.671127319335938, 772.8099975585938, -192.84725952148438, 1268.9635009765625, 1217.2686767578125, 174.675048828125, -173.6085662841797, -83.04922485351562, 318.9615478515625, 1060.3231201171875, 804.7293701171875, 343.9483337402344, 1020.5938720703125, -496.0598449707031, 323.3182067871094, 1375.0899658203125, 1036.5296630859375, 105.6887435913086, 614.912109375, -193.4722442626953, 200.99432373046875, 245.86737060546875, -156.15371704101562, -103.54563903808594, 268.8494567871094, 678.249267578125, 147.7841033935547, 158.89404296875, -37.68218994140625, -94.24392700195312, -0.0732269287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 443.34161376953125, "std": 470.8882141113281, "min": -568.2430419921875, "p10": -25.201585006713852, "median": 413.70928955078125, "p90": 1039.5515686035158, "max": 1713.5010986328125, "pos_frac": 0.859375, "sample": [-2.2891273498535156, 338.870361328125, 594.191650390625, 414.9767761230469, 1713.5010986328125, 1343.978759765625, -568.2430419921875, 810.9594116210938, 667.5142211914062, 106.66000366210938, 421.93084716796875, -9.887592315673828, 753.7921142578125, 320.80511474609375, -238.81964111328125, 1062.3309326171875, 717.69873046875, -219.75958251953125, 415.52484130859375, 801.5276489257812, 153.64593505859375, -439.237548828125, 338.1961975097656, 1020.1333618164062, 859.2442626953125, 48.50164031982422, 753.9090576171875, 937.038330078125, 217.3430938720703, 300.6712951660156, -432.0177001953125, 50.09681701660156, 337.0305480957031, -201.37359619140625, 1706.6671142578125, 412.4418029785156, 461.20501708984375, 99.62645721435547, 367.34197998046875, 1318.224365234375, 1047.8736572265625, 502.15264892578125, -31.764724731445312, 27.59777069091797, 525.923583984375, 589.8535766601562, 690.5194702148438, 908.4224243164062, 375.9185485839844, 648.9925537109375, 23.589250564575195, 90.04866027832031, 537.5182495117188, 139.71383666992188, 166.06422424316406, 490.27862548828125, 823.50537109375, 643.1348876953125, 361.72894287109375, 416.25469970703125, 1174.366943359375, 80.38682556152344, 329.1739807128906, 58.65650939941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 383.71905517578125, "std": 579.5550537109375, "min": -981.5274047851562, "p10": -201.83419647216795, "median": 338.87611389160156, "p90": 1066.8155090332034, "max": 2546.05859375, "pos_frac": 0.78125, "sample": [368.2850341796875, 56.15672302246094, -21.53240966796875, -193.03765869140625, 256.899658203125, 380.98895263671875, -483.2177734375, 153.385498046875, 258.400634765625, 35.604522705078125, 158.07740783691406, 784.4522705078125, 835.6859741210938, -588.4349365234375, 350.274169921875, 440.0400085449219, -238.400634765625, 799.5262451171875, 88.4317626953125, 610.6583251953125, 163.88597106933594, -58.911224365234375, 245.8434295654297, 42.225154876708984, 453.1379089355469, 1089.0838623046875, 900.6019287109375, 741.8576049804688, 182.12576293945312, 569.3985595703125, 283.80462646484375, 394.74664306640625, 112.19963836669922, 372.7152099609375, 1102.184326171875, 1014.8560180664062, 825.22802734375, -205.60414123535156, 2546.05859375, 1266.3896484375, 349.7342834472656, 56.57398986816406, 146.99813842773438, 596.3592529296875, 366.2560119628906, -630.0629272460938, -24.608718872070312, 421.22662353515625, 125.7118148803711, -344.7051086425781, -67.36431121826172, -8.7440185546875, 420.6240234375, 511.960205078125, 1399.2799072265625, 599.0112915039062, 325.7022399902344, 756.9817504882812, -981.5274047851562, 1270.20068359375, 894.1280517578125, 328.0179443359375, 2050.51904296875, -98.32479095458984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 206.6892852783203, "std": 420.43902587890625, "min": -777.1385498046875, "p10": -364.28716735839845, "median": 167.61241149902344, "p90": 822.4201416015626, "max": 1112.853759765625, "pos_frac": 0.75, "sample": [121.93746948242188, -446.2652587890625, 937.6326904296875, 847.822998046875, -655.6426391601562, 789.7584838867188, 156.77279663085938, 299.7225341796875, 832.0765380859375, 63.228424072265625, 1112.853759765625, 320.8089294433594, 370.2806091308594, 250.99227905273438, -368.571533203125, 149.114501953125, -278.5166015625, 427.54632568359375, -739.2918701171875, 884.6624755859375, 680.928955078125, 360.4391174316406, 799.8885498046875, 347.9892272949219, 856.5126342773438, 339.38250732421875, -97.61705017089844, -16.798782348632812, 75.00445556640625, -354.2903137207031, 193.54376220703125, 202.59275817871094, 146.53262329101562, 380.5444030761719, 119.95848846435547, -70.01091003417969, 456.3385009765625, 117.9747085571289, -544.0753784179688, 289.3187255859375, -410.07611083984375, -338.51068115234375, 726.159423828125, 19.742835998535156, 167.30726623535156, 567.2152099609375, 167.9175567626953, 426.2195739746094, 594.5709228515625, 523.8422241210938, 111.36587524414062, -174.48121643066406, 432.59490966796875, 13.398611068725586, 123.80752563476562, -777.1385498046875, 92.79228210449219, 878.6915283203125, 35.608543395996094, -3.4251937866210938, 440.1152648925781, 216.97889709472656, 38.47765350341797, -6.14019775390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 210.45883178710938, "std": 457.6186218261719, "min": -888.119873046875, "p10": -228.42888641357422, "median": 179.23990631103516, "p90": 818.8306396484376, "max": 1325.493896484375, "pos_frac": 0.671875, "sample": [-210.87635803222656, 856.413818359375, 689.2627563476562, 593.296630859375, 10.874626159667969, -231.31268310546875, 558.6845703125, -221.7000274658203, -99.50544738769531, -182.49830627441406, 283.54180908203125, 1019.7044677734375, 1180.374267578125, 366.42510986328125, 49.15773010253906, 166.85821533203125, 398.0588073730469, 191.62159729003906, 627.0879516601562, 453.8515319824219, -103.80436706542969, -68.19670867919922, 560.8699340820312, 801.08642578125, -31.672653198242188, 39.1282958984375, 826.435302734375, -161.20057678222656, 11.482406616210938, -595.048828125, 512.9468994140625, 77.86167907714844, -65.99894714355469, 447.0867919921875, 26.693851470947266, 1235.5252685546875, 17.60302734375, 1325.493896484375, 28.004806518554688, 78.2764892578125, 145.21377563476562, 230.64915466308594, 308.10198974609375, 721.330078125, -888.119873046875, 195.33035278320312, 558.3927612304688, -502.6644592285156, 375.52960205078125, 197.70938110351562, 262.8651428222656, 273.37225341796875, 947.1351928710938, -160.21202087402344, -37.548362731933594, 252.76730346679688, -186.5791473388672, -283.5189208984375, -83.74165344238281, -742.417724609375, 659.5211181640625, -489.38702392578125, 378.4375915527344, -124.69505310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 341.8111572265625, "std": 552.0615844726562, "min": -1225.373046875, "p10": -266.1801513671875, "median": 358.25457763671875, "p90": 958.5956848144533, "max": 1808.993896484375, "pos_frac": 0.734375, "sample": [-701.9121704101562, 187.68142700195312, 638.4929809570312, 231.16006469726562, -183.93319702148438, 692.4388427734375, 668.6526489257812, 977.106201171875, 403.7172546386719, 625.4520874023438, 408.1546936035156, 718.799560546875, 249.15869140625, 864.0260620117188, -1225.373046875, 687.9954223632812, 907.7785034179688, 1229.724609375, -13.094779968261719, 559.3275756835938, 145.179931640625, 587.9620361328125, -138.40182495117188, 877.8814697265625, -247.39166259765625, 291.5000915527344, 364.46221923828125, 109.04969024658203, -316.08135986328125, -16.57140350341797, 357.0290222167969, 337.1540832519531, -1052.2620849609375, 1325.3909912109375, 543.0944213867188, 71.71533203125, -161.41006469726562, 509.74322509765625, 589.4266967773438, 515.8651123046875, 1318.1943359375, 53.35895538330078, 43.62359619140625, 100.29603576660156, 653.891845703125, 550.3590087890625, -426.2663879394531, 56.692291259765625, 1368.607421875, 851.6658325195312, 289.44183349609375, -137.727783203125, 359.4801330566406, -89.07593536376953, -274.23236083984375, -0.666656494140625, 508.61785888671875, 165.2587890625, 915.4044799804688, 551.0224609375, -323.3712463378906, 1070.3436279296875, 1808.993896484375, -156.6884002685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 419.45245361328125, "std": 553.9248046875, "min": -1000.705322265625, "p10": -153.64368820190424, "median": 461.176025390625, "p90": 1037.924060058594, "max": 2149.9462890625, "pos_frac": 0.765625, "sample": [1054.595703125, 110.58949279785156, 209.4908447265625, -1000.705322265625, -100.65740203857422, 520.4774169921875, 464.3012390136719, 601.365966796875, -37.83844757080078, -72.69387817382812, 635.4468994140625, 515.52197265625, 98.58768463134766, 997.4625244140625, 580.4225463867188, 163.21530151367188, 703.5986328125, -571.5056762695312, 2149.9462890625, 285.862548828125, 215.74420166015625, 846.1857299804688, 668.0228271484375, 35.639930725097656, -515.516845703125, 458.0508117675781, 276.07208251953125, 931.0504150390625, -303.484375, 1087.9620361328125, 195.3590087890625, 14.186767578125, 620.0991821289062, -57.862525939941406, 301.8516540527344, -13.560171127319336, 589.035400390625, 999.0235595703125, 943.80908203125, -176.3520965576172, 6.64484977722168, 566.1521606445312, 695.6744384765625, 614.0235595703125, 717.6832275390625, 104.29454803466797, 650.7838134765625, 1811.987060546875, 1345.2362060546875, 120.48084259033203, 516.229736328125, 752.6585693359375, 391.0094909667969, 762.921142578125, -78.15371704101562, 799.701416015625, 1367.8988037109375, 369.7634582519531, -550.771484375, -92.32615661621094, -221.25328063964844, 660.0364379882812, -51.053382873535156, 1162.5341796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 343.47540283203125, "std": 545.9921875, "min": -971.7412109375, "p10": -409.6001434326171, "median": 251.77123260498047, "p90": 959.5072021484375, "max": 1750.04296875, "pos_frac": 0.765625, "sample": [81.04489135742188, -200.8260040283203, -577.6007080078125, 1750.04296875, -550.37109375, 494.8980407714844, 963.2872314453125, 583.0401611328125, 792.3264770507812, 62.72740936279297, 950.6871337890625, 703.58837890625, 791.6670532226562, -465.77154541015625, -4.077104568481445, 524.3553466796875, 39.022491455078125, 17.25390625, 238.23545837402344, 1216.0738525390625, 834.836669921875, -13.310104370117188, 734.8082275390625, 1297.46533203125, -36.777198791503906, 798.478759765625, -522.1990966796875, 821.5264282226562, 824.1560668945312, -746.6032104492188, 840.7296752929688, 839.85302734375, -30.115676879882812, 90.21849822998047, 137.59808349609375, 806.1464233398438, -624.0968017578125, 137.84872436523438, 1061.9503173828125, -971.7412109375, -132.5910186767578, 87.49943542480469, 226.925537109375, 628.4013061523438, 71.0185317993164, -142.8487548828125, 565.368408203125, 85.2725601196289, 51.34814453125, 265.3070068359375, 1198.128662109375, 678.5341186523438, 734.6494140625, 196.24728393554688, 565.4569091796875, 488.8572082519531, 641.4322509765625, 335.69097900390625, 72.1325912475586, 521.6985473632812, 106.07734680175781, 1241.9124755859375, -278.5335388183594, 84.06501770019531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 363.14068603515625, "std": 570.55517578125, "min": -1016.7554931640625, "p10": -296.8836944580078, "median": 352.307861328125, "p90": 1146.6254516601564, "max": 1821.232177734375, "pos_frac": 0.765625, "sample": [-74.3940200805664, 354.38214111328125, 351.645263671875, -416.34381103515625, -84.76113891601562, 1272.7520751953125, 225.59434509277344, 913.2935180664062, 286.635498046875, 27.744842529296875, 208.23654174804688, -184.4510040283203, 357.8734130859375, 196.7729949951172, 1821.232177734375, 708.9837036132812, 1042.767578125, 400.1168212890625, 381.168212890625, 846.183837890625, 1183.440185546875, 1.8832550048828125, 229.5084686279297, 197.07296752929688, 774.4451904296875, -585.0147705078125, -623.1751708984375, 459.24981689453125, 997.793212890625, 536.7877197265625, -664.9016723632812, 248.64199829101562, 480.90313720703125, 330.9891357421875, 411.422607421875, 1343.84912109375, -203.30357360839844, 531.5158081054688, 323.6090087890625, 842.0429077148438, 1153.5377197265625, 11.701469421386719, -60.60322570800781, -281.8434143066406, 677.6421508789062, 57.34649658203125, 43.1215705871582, 590.588623046875, -7.268161773681641, -1016.7554931640625, 352.970458984375, 568.1395263671875, -303.32952880859375, -82.54959106445312, 39.817752838134766, -517.3052978515625, 1130.496826171875, 803.3047485351562, 1358.3544921875, 407.7889404296875, 1796.39697265625, 575.4908447265625, 484.12615966796875, 7.641696929931641], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 381.8934326171875, "std": 483.505859375, "min": -533.168701171875, "p10": -74.17421264648435, "median": 307.42140197753906, "p90": 1015.1596008300783, "max": 2643.1884765625, "pos_frac": 0.84375, "sample": [689.8995361328125, -344.8819580078125, 926.6624145507812, 518.9968872070312, 28.1800537109375, 730.450927734375, -111.33192443847656, -48.86920166015625, 1254.82666015625, 379.6676940917969, 395.219970703125, 266.2747497558594, 399.9114990234375, 1052.355712890625, 852.7251586914062, 23.2655029296875, 35.2239990234375, 231.0886688232422, -533.168701171875, 75.51693725585938, 489.5430603027344, 1033.4986572265625, 142.066162109375, 420.9422302246094, 36.658843994140625, 218.72349548339844, 66.68714904785156, 302.0223693847656, 441.7703857421875, 332.11566162109375, -189.04380798339844, 1078.656982421875, 47.25804901123047, 514.0234375, 528.5756225585938, 972.3684692382812, -121.43778991699219, 2643.1884765625, 10.684555053710938, 6.286333084106445, 648.154296875, 62.75859069824219, 255.7646484375, 312.8204345703125, 338.9306335449219, 530.2051391601562, 1263.86279296875, 82.52945709228516, 735.7581787109375, 452.80810546875, 814.78466796875, -8.754898071289062, -53.84593963623047, 117.18756103515625, 1036.5333251953125, -145.99343872070312, 354.29425048828125, 49.509246826171875, 702.196533203125, 275.89837646484375, -82.8863296508789, 352.46875, 249.68226623535156, 299.91033935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 342.99078369140625, "std": 452.3673400878906, "min": -351.58099365234375, "p10": -127.98060150146482, "median": 234.8280792236328, "p90": 780.6978271484376, "max": 2041.6422119140625, "pos_frac": 0.75, "sample": [28.535625457763672, -56.21617126464844, 1078.38037109375, -65.05140686035156, -193.85902404785156, -165.24444580078125, 467.6201171875, 236.8520050048828, 118.71056365966797, -86.5272445678711, -351.58099365234375, 342.5223388671875, -159.45767211914062, 275.43475341796875, 978.2937622070312, 740.9673461914062, 361.122802734375, 627.737060546875, 154.46762084960938, 783.278076171875, -38.40220642089844, -35.44267654418945, 133.84776306152344, 68.65235137939453, 415.0146484375, -101.24192810058594, -70.86119842529297, 584.8518676757812, 197.22323608398438, 199.3892822265625, 452.80621337890625, -76.44668579101562, 117.34040832519531, 681.89990234375, 18.53289031982422, -200.08563232421875, 2041.6422119140625, 1838.2452392578125, 555.2525634765625, 397.04168701171875, 196.470703125, 700.5612182617188, 678.8220825195312, 142.21214294433594, -83.41082763671875, 578.3701782226562, 219.1315155029297, 774.67724609375, 199.17001342773438, 387.536865234375, 202.31674194335938, 568.129150390625, 493.41021728515625, 232.8041534423828, 1025.0899658203125, 467.213134765625, -139.44003295898438, 721.789794921875, -318.6455993652344, 1049.8963623046875, 256.9879455566406, 419.87255859375, 765.202392578125, 117.99697875976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 407.1573791503906, "std": 533.6115112304688, "min": -656.662841796875, "p10": -268.42975463867185, "median": 348.2099914550781, "p90": 1049.0947265625, "max": 1695.4080810546875, "pos_frac": 0.75, "sample": [-34.271148681640625, 156.79791259765625, 901.2892456054688, 415.48883056640625, 125.76460266113281, 217.54891967773438, 705.2083129882812, -260.5550537109375, 1695.4080810546875, 1280.33251953125, 1606.3299560546875, -468.62701416015625, 546.7098388671875, 625.666748046875, -50.16368103027344, -82.93537902832031, 599.2463989257812, 221.4395751953125, -22.64008331298828, 7.790618896484375, -0.7005691528320312, -0.05033111572265625, 388.39324951171875, -271.80462646484375, 271.4341735839844, 268.59429931640625, -489.11669921875, 535.0516357421875, 117.97525024414062, 351.681396484375, -384.9371032714844, 1021.356201171875, -222.24583435058594, 1315.920654296875, 926.84912109375, 712.3750610351562, 859.203369140625, 564.2677612304688, 216.08993530273438, 309.5259094238281, 175.59100341796875, 1232.4105224609375, 933.0670776367188, 510.3740539550781, 888.3802490234375, -92.97103881835938, -416.0021057128906, 766.1849975585938, 62.76625061035156, 865.385009765625, 205.18392944335938, 671.5531005859375, 1060.982666015625, 704.1632080078125, 344.73858642578125, 341.72259521484375, -379.58538818359375, 995.5916137695312, 1534.779052734375, 357.4400634765625, -656.662841796875, 475.1766357421875, 762.5184936523438, 39.5919189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 208.8978271484375, "std": 441.0288391113281, "min": -740.4921264648438, "p10": -293.3909454345703, "median": 154.85781860351562, "p90": 819.37099609375, "max": 1274.1009521484375, "pos_frac": 0.6875, "sample": [13.222465515136719, 527.3551635742188, -276.610107421875, 541.00439453125, -53.42436218261719, 863.316650390625, 46.61842346191406, -8.072921752929688, 85.59197998046875, 16.289901733398438, 743.9371948242188, 817.3521728515625, 1274.1009521484375, -19.5521240234375, 571.5341186523438, 491.91766357421875, -172.08811950683594, 393.74322509765625, 156.99639892578125, -230.63526916503906, 255.7657012939453, 726.1495971679688, 152.71923828125, 86.69479370117188, -452.94378662109375, -683.0689086914062, 152.5142364501953, 286.3825378417969, 879.5526123046875, 273.59991455078125, -71.41529846191406, 427.3994140625, 411.3616943359375, -300.5827331542969, 177.20803833007812, 669.442138671875, 51.135047912597656, 712.1102294921875, -86.05821990966797, 557.6962280273438, -704.0512084960938, -740.4921264648438, 198.5894012451172, 304.897705078125, 10.951416015625, 23.22148895263672, 987.6286010742188, 493.750732421875, -226.0847930908203, 802.2496337890625, 146.3569793701172, -176.826904296875, -355.9468688964844, 849.8927001953125, 160.5073699951172, -109.44206237792969, 872.7145385742188, -498.6292724609375, -242.566162109375, 45.74049377441406, 820.2362060546875, 214.09039306640625, 572.1934814453125, -87.78071594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 360.545654296875, "std": 497.9388732910156, "min": -718.6923828125, "p10": -182.5025451660156, "median": 334.0558166503906, "p90": 995.515856933594, "max": 1940.095947265625, "pos_frac": 0.765625, "sample": [-371.44659423828125, 37.267974853515625, 647.2745361328125, 44.63450622558594, 275.2242126464844, 274.2415771484375, -249.64210510253906, 608.3898315429688, 781.388427734375, 11.619319915771484, 663.7328491210938, 227.25120544433594, 305.80419921875, 168.86927795410156, -718.6923828125, 1092.5093994140625, 420.10369873046875, -52.17961120605469, 360.123046875, 643.1838989257812, 421.05548095703125, 1342.2818603515625, 438.5365295410156, 36.86681365966797, 1940.095947265625, -158.247314453125, -192.89764404296875, 506.80987548828125, 672.2471313476562, 285.997802734375, 307.98858642578125, 1281.884033203125, 450.49530029296875, 108.59239196777344, 568.2145385742188, -662.7710571289062, 373.6551208496094, 206.81671142578125, 916.73828125, 865.1844482421875, 523.4932250976562, 586.9481201171875, -109.68962097167969, 463.9312744140625, 178.5189666748047, -664.5232543945312, 213.17886352539062, -88.6008071899414, 1044.1212158203125, 933.0159912109375, 1022.301513671875, -3.8511199951171875, 377.6123046875, 733.3058471679688, 724.1797485351562, 1158.98193359375, -41.92781066894531, 898.2929077148438, 206.3849639892578, -74.64519500732422, -318.31982421875, 24.725067138671875, 470.85693359375, -62.571205139160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 319.8925476074219, "std": 576.255615234375, "min": -998.720947265625, "p10": -308.729360961914, "median": 279.3175964355469, "p90": 902.5152648925783, "max": 2229.31640625, "pos_frac": 0.71875, "sample": [51.440635681152344, 395.16973876953125, 6.1559906005859375, -998.720947265625, 1145.493408203125, -4.41595458984375, 81.36955261230469, 370.5701904296875, 111.06644439697266, 209.302978515625, -36.40950012207031, 432.67242431640625, -58.56040954589844, 357.21405029296875, 652.5391845703125, 754.116943359375, -443.45391845703125, 242.85064697265625, 104.31660461425781, 2062.001708984375, 457.14898681640625, 682.8423461914062, 79.20613098144531, -126.94210052490234, 450.79296875, 306.41314697265625, 762.4432373046875, 968.5790405273438, -374.4407958984375, 435.87579345703125, 309.94573974609375, -40.4725341796875, -229.28399658203125, 20.60639762878418, -5.736474990844727, 605.8369140625, 524.0521240234375, 1327.8955078125, 655.0535888671875, 33.99462127685547, 283.36297607421875, 19.717926025390625, -94.90390014648438, 364.8183898925781, 2229.31640625, 848.3057250976562, 268.57080078125, 302.1685791015625, -402.39825439453125, 691.9619750976562, 739.4788208007812, -518.4219970703125, -42.60429382324219, 731.3168334960938, 1791.815673828125, 575.2020874023438, 925.7479248046875, 432.9013366699219, 275.272216796875, -34.75408172607422, 31.53864288330078, -99.74880981445312, -342.7773742675781, -781.2947998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 274.52154541015625, "std": 606.1994018554688, "min": -1476.5338134765625, "p10": -351.56120300292963, "median": 274.5212707519531, "p90": 978.0293090820314, "max": 2111.327880859375, "pos_frac": 0.6875, "sample": [209.25241088867188, 188.09375, -734.2431030273438, 761.4019165039062, 585.1732788085938, -44.084228515625, 156.30795288085938, 1940.883544921875, 423.25946044921875, 744.7196655273438, 373.6211853027344, 1014.0841064453125, 318.8760681152344, 2111.327880859375, 570.4249267578125, 296.6788330078125, 182.28643798828125, -567.505615234375, 1270.7020263671875, 933.0733642578125, -458.82080078125, -279.2012939453125, 297.9055480957031, -883.8906860351562, 997.296142578125, -235.05661010742188, 188.8291015625, -224.51113891601562, -118.76847839355469, 296.65191650390625, -83.61451721191406, 314.0120849609375, 17.494461059570312, -30.83773422241211, 1613.9034423828125, -1476.5338134765625, 86.76241302490234, 133.19952392578125, 213.72653198242188, 789.4957275390625, 330.2493591308594, 271.27081298828125, 119.65057373046875, 346.3329162597656, 611.2040405273438, 277.771728515625, 601.9247436523438, 540.3251953125, 808.7539672851562, 570.4883422851562, 560.1199340820312, -148.14859008789062, -232.5623321533203, 1023.5696411132812, 569.407470703125, -284.211669921875, -129.2694091796875, -55.141876220703125, -514.1967163085938, 661.5565185546875, -293.4158935546875, 330.411865234375, 87.39456939697266, -376.4806213378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 300.30303955078125, "std": 471.1249694824219, "min": -635.5365600585938, "p10": -298.3164962768554, "median": 249.43817138671875, "p90": 926.1787414550781, "max": 1963.7626953125, "pos_frac": 0.796875, "sample": [18.340478897094727, -83.75228881835938, -545.711181640625, 918.4244384765625, -118.18377685546875, 477.3337707519531, 35.20703887939453, 98.76673126220703, 499.58819580078125, 288.1738586425781, 667.2256469726562, 72.30353546142578, 305.7744140625, 147.72958374023438, 161.0617218017578, 206.12388610839844, 929.5020141601562, -450.0364990234375, -407.70001220703125, 386.3706970214844, 1182.439208984375, 507.9242248535156, 157.1499481201172, 285.3066711425781, 472.0045166015625, 599.802001953125, 72.32528686523438, 165.21633911132812, 266.4804992675781, 430.1763610839844, 555.2217407226562, -377.0722351074219, 135.59083557128906, -148.37254333496094, 459.8414001464844, 1001.4118041992188, 685.9968872070312, 125.17823791503906, 279.1081237792969, -236.87608337402344, -84.38418579101562, 281.75494384765625, 282.4498291015625, 208.9913330078125, 50.17059326171875, -635.5365600585938, 1281.556396484375, 1251.321044921875, -54.12519836425781, 1963.7626953125, -347.41253662109375, 161.21914672851562, -324.6481018066406, 42.859161376953125, 252.71298217773438, 349.499267578125, 178.27691650390625, 229.64663696289062, 246.16336059570312, 507.287841796875, 745.5939331054688, 1095.4576416015625, 404.819091796875, 906.56298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 379.4727478027344, "std": 421.556640625, "min": -489.0425109863281, "p10": -144.63215866088862, "median": 360.8226318359375, "p90": 928.7424194335939, "max": 1357.4423828125, "pos_frac": 0.78125, "sample": [349.7225646972656, 947.445068359375, 1239.0989990234375, 427.9679260253906, -236.66921997070312, 741.8492431640625, 710.8623046875, 299.70709228515625, 145.21897888183594, 709.5891723632812, 371.9226989746094, 534.8489379882812, -297.8638610839844, 490.54498291015625, 1036.9129638671875, 775.7565307617188, 673.8487548828125, -489.0425109863281, -35.50798797607422, 784.8909912109375, 97.30106353759766, 461.6383056640625, 438.072998046875, 158.18405151367188, 293.7792053222656, 677.0159912109375, 678.5894165039062, 967.9841918945312, 996.6099243164062, -304.74713134765625, 885.1029052734375, 1357.4423828125, -81.65267944335938, 218.52752685546875, 667.4027099609375, 137.45474243164062, 293.0062561035156, 1235.8043212890625, -362.75885009765625, 293.32904052734375, 235.3734893798828, 122.09745025634766, 704.25537109375, 311.42913818359375, 407.7318115234375, -100.1061782836914, 508.5763854980469, 432.5841064453125, 55.70829772949219, 340.71014404296875, 718.2879028320312, -62.518043518066406, 121.5052490234375, 724.199462890625, -59.76802062988281, 512.461669921875, -350.4064025878906, 771.63330078125, -76.65612030029297, -163.7147216796875, -12.767934799194336, 659.2142944335938, 150.91580200195312, 46.31826400756836], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 308.17388916015625, "std": 568.7922973632812, "min": -1489.0843505859375, "p10": -189.25202331542962, "median": 213.87254333496094, "p90": 1029.9190612792975, "max": 1854.421875, "pos_frac": 0.78125, "sample": [-132.15765380859375, 1267.461669921875, 172.05184936523438, 179.51795959472656, 382.29949951171875, 112.77146911621094, 1085.989013671875, 1335.077392578125, 534.8025512695312, 155.0304718017578, 61.84278106689453, 609.0901489257812, 746.23974609375, 612.223876953125, 114.19743347167969, 1141.80419921875, -1489.0843505859375, 422.58428955078125, -471.2794189453125, 691.568359375, 1555.3353271484375, 774.6148071289062, 86.59486389160156, 396.0819091796875, 251.72705078125, 129.69595336914062, -744.8075561523438, 424.57000732421875, -12.723846435546875, 567.4061889648438, 477.8914794921875, 15.930740356445312, -25.23216438293457, 272.049072265625, 340.9251403808594, 169.11537170410156, 899.0891723632812, -213.72103881835938, -13.178466796875, 207.9365234375, 828.1890869140625, 211.79635620117188, 69.64057922363281, -321.7864074707031, -79.00029754638672, 753.409912109375, 690.9508056640625, 75.12013244628906, -10.099004745483398, 6.7236480712890625, -299.7794494628906, 540.6698608398438, 521.305419921875, 17.57967185974121, 215.94873046875, 445.82965087890625, 217.12789916992188, -15.547142028808594, 1854.421875, 94.87254333496094, 479.6916809082031, -1260.989501953125, 128.22412109375, 1467.498291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 420.5235595703125, "std": 435.84423828125, "min": -513.0210571289062, "p10": -112.41095504760736, "median": 392.991943359375, "p90": 1012.5194641113283, "max": 1828.55712890625, "pos_frac": 0.84375, "sample": [1093.5352783203125, 990.388671875, 784.35791015625, -315.1925048828125, -221.54429626464844, 72.35128784179688, 1828.55712890625, 225.334716796875, 72.80799865722656, -54.382972717285156, 96.78096008300781, 637.5157470703125, 4.113384246826172, 431.205322265625, -189.85330200195312, 49.93337631225586, 562.6636962890625, 493.9225158691406, 399.1180419921875, -2.6654605865478516, -215.19412231445312, 703.1834716796875, 90.16665649414062, 512.3587036132812, 1238.0667724609375, 761.5219116210938, 227.71104431152344, 362.54534912109375, 148.54034423828125, 810.391845703125, 391.51605224609375, 350.671630859375, 445.7055969238281, 499.82757568359375, 214.39381408691406, 211.88551330566406, 1079.9346923828125, 275.6937255859375, -137.28009033203125, 890.8931884765625, 216.42160034179688, 1022.0040893554688, 429.41455078125, 1035.337158203125, 37.160858154296875, -513.0210571289062, 394.46783447265625, 11.450000762939453, 247.52134704589844, 532.7185668945312, 1052.1317138671875, -18.39745330810547, 348.0191955566406, 707.8302612304688, 309.3343811035156, 634.232666015625, 676.921630859375, 241.88803100585938, 446.28692626953125, 863.6845703125, -186.78660583496094, 963.7952880859375, 819.1298217773438, 820.480224609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 303.75506591796875, "std": 484.9083251953125, "min": -714.673828125, "p10": -251.78580932617186, "median": 205.40718841552734, "p90": 1068.0536254882813, "max": 1338.9581298828125, "pos_frac": 0.734375, "sample": [687.91943359375, 439.05963134765625, 301.90704345703125, 142.17486572265625, 196.66500854492188, 202.7649383544922, 345.9949645996094, 198.30343627929688, -159.02574157714844, 1072.7601318359375, 766.4313354492188, -119.22076416015625, 797.3428955078125, 57.95946502685547, 45.027793884277344, 773.4844970703125, 14.013883590698242, -20.2181396484375, 203.5712127685547, 268.58258056640625, 796.8861083984375, 1057.07177734375, 1325.0418701171875, 207.2431640625, 586.8960571289062, -0.6228904724121094, 375.0321044921875, 273.09100341796875, 333.64715576171875, -239.43408203125, 28.162200927734375, 1158.929931640625, 568.640625, 470.05938720703125, 259.35552978515625, -66.33309936523438, -40.01164245605469, -567.608154296875, 304.8677978515625, 1338.9581298828125, -191.8900909423828, 135.0355224609375, 145.55853271484375, -714.673828125, 608.4573364257812, -19.4122314453125, -450.3150634765625, 1.184408187866211, 1241.5557861328125, 761.19580078125, -556.9284057617188, 10.3585205078125, 1090.620849609375, 1186.116943359375, 319.80340576171875, 186.78106689453125, -314.63262939453125, 689.2130126953125, -136.72653198242188, -279.0082092285156, -257.07940673828125, 1051.864990234375, 133.85269165039062, 414.0194396972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 294.3906555175781, "std": 495.5754089355469, "min": -725.8864135742188, "p10": -242.8824691772461, "median": 170.85684967041016, "p90": 937.119470214844, "max": 1808.728515625, "pos_frac": 0.703125, "sample": [-412.6551208496094, -188.61474609375, 99.83869934082031, -89.5483169555664, 1084.780029296875, 171.69070434570312, 593.17822265625, 602.0062866210938, 1808.728515625, 959.2260131835938, 154.44761657714844, 686.9537353515625, -285.3123779296875, 72.93292236328125, 333.080078125, -79.65501403808594, -298.6333312988281, 141.59730529785156, 1251.2838134765625, 1063.08642578125, -242.4896697998047, -63.51856231689453, 318.6604919433594, -140.10682678222656, 37.904991149902344, 280.5747375488281, 433.4871826171875, 151.34378051757812, 460.88543701171875, -73.42377471923828, 811.2291870117188, 487.631103515625, -117.14128875732422, -51.13238525390625, 205.3883056640625, 238.6284942626953, 1203.9500732421875, -172.11981201171875, 31.28026580810547, -243.05081176757812, 350.9508056640625, -431.94549560546875, -12.105781555175781, 542.4414672851562, 159.01187133789062, 105.67845153808594, -725.8864135742188, -300.1939697265625, -237.42919921875, 835.48779296875, 257.6125183105469, 472.8150634765625, 512.8536376953125, 677.430908203125, 885.5375366210938, 170.0229949951172, 303.98681640625, 673.8616943359375, 650.3394165039062, 129.4764862060547, 739.1400756835938, 68.97193908691406, 115.31796264648438, 1671.2333984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 238.64556884765625, "std": 439.05950927734375, "min": -1469.4586181640625, "p10": -173.13902282714844, "median": 241.01809692382812, "p90": 726.7900695800785, "max": 1385.3785400390625, "pos_frac": 0.765625, "sample": [17.347797393798828, -100.64595031738281, 550.9791259765625, 827.8873291015625, 550.0283203125, -1469.4586181640625, 451.918701171875, 403.37994384765625, 138.57684326171875, -464.58648681640625, -339.54730224609375, 30.836807250976562, -33.424285888671875, 175.34521484375, 273.3448486328125, 312.90966796875, 130.16966247558594, 595.3753662109375, -350.6129150390625, 35.596099853515625, 354.1393127441406, -108.68782043457031, 269.8043212890625, 370.43426513671875, 206.2478485107422, 156.3391571044922, 646.7841796875, -162.75897216796875, 550.6163940429688, 75.99185943603516, 761.0783081054688, 31.96354866027832, -177.58761596679688, 592.3338623046875, 259.6455078125, 371.49468994140625, 638.90625, 15.369377136230469, 1282.737060546875, -148.65249633789062, 165.1571502685547, 998.0952758789062, -395.3924560546875, 2.4023818969726562, -364.5915222167969, 222.39068603515625, 116.23983001708984, 821.6326904296875, -37.38309860229492, 26.76409912109375, -34.409889221191406, 1073.319580078125, 260.4815979003906, 136.552490234375, 343.5225524902344, 614.9671630859375, 472.9402160644531, -125.64468383789062, 279.75311279296875, 443.28570556640625, 358.75146484375, 271.8286437988281, 1385.3785400390625, 515.6571655273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 338.2943420410156, "std": 429.0459899902344, "min": -821.2274780273438, "p10": -204.34078521728514, "median": 359.16627502441406, "p90": 897.6194641113283, "max": 1249.985107421875, "pos_frac": 0.78125, "sample": [209.5787353515625, 238.10263061523438, 815.7845458984375, -178.71583557128906, 554.6782836914062, 714.9329223632812, 464.9523010253906, -136.0570831298828, 272.7423095703125, 517.1795043945312, 1214.7099609375, 452.5909118652344, 513.9487915039062, 527.185546875, 298.400634765625, -156.22769165039062, 583.8262939453125, -42.38435363769531, 99.3052978515625, 1249.985107421875, -821.2274780273438, 96.49658203125, -338.21746826171875, -454.18548583984375, 934.83642578125, 459.27484130859375, -516.0247192382812, 916.7781372070312, 1155.6358642578125, 245.64772033691406, 225.40939331054688, 852.9158935546875, 976.4794311523438, 363.6083984375, 458.6983337402344, 589.6539306640625, 272.85162353515625, 22.72249984741211, 383.57373046875, 232.9486083984375, 146.64158630371094, -48.17242431640625, 803.0489501953125, 354.7241516113281, 73.42510986328125, 1032.8004150390625, 169.29908752441406, 133.17620849609375, -22.256935119628906, 405.86541748046875, 443.7840881347656, 518.1693725585938, 97.17779541015625, 547.9247436523438, 749.6657104492188, -215.32290649414062, -248.5136260986328, -338.3516845703125, 503.4427490234375, 629.9535522460938, 610.7401733398438, 779.28466796875, 313.9474792480469, -62.01076126098633], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 398.14605712890625, "std": 525.4922485351562, "min": -607.3589477539062, "p10": -177.49279174804684, "median": 368.93800354003906, "p90": 904.1949462890627, "max": 2343.173583984375, "pos_frac": 0.765625, "sample": [513.0465698242188, 775.6928100585938, 66.8555908203125, 420.5118103027344, 801.4203491210938, 174.84262084960938, -451.46392822265625, 858.1634521484375, 567.9362182617188, -195.99339294433594, 566.9906616210938, 1922.3626708984375, 1123.2945556640625, 456.11602783203125, 670.2523193359375, -118.2811508178711, 274.18487548828125, 519.2025756835938, 76.99089813232422, 711.1826171875, 1395.6788330078125, 662.9375, -3.929220199584961, 194.42922973632812, -607.3589477539062, -289.5268249511719, 123.66783905029297, 487.2416076660156, -389.3475036621094, 2343.173583984375, 307.170654296875, -26.611221313476562, -244.80506896972656, 923.9227294921875, 156.30615234375, 528.7049560546875, -353.30108642578125, 468.1509704589844, -51.644500732421875, 377.8755798339844, 766.2053833007812, -63.894676208496094, 245.00450134277344, 396.7624206542969, 618.060546875, 557.47119140625, 687.34619140625, 352.5257263183594, 99.02222442626953, 1335.8018798828125, 132.83773803710938, 1099.8629150390625, 360.00042724609375, 811.774169921875, 59.9415283203125, 743.905517578125, -134.32472229003906, 194.42141723632812, 141.30165100097656, 414.7901611328125, -107.441650390625, 787.2041015625, 268.6315002441406, -21.906532287597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 340.473388671875, "std": 440.62109375, "min": -812.2875366210938, "p10": -230.80630187988277, "median": 262.63470458984375, "p90": 966.0789916992188, "max": 1304.695068359375, "pos_frac": 0.796875, "sample": [64.862548828125, 1108.38427734375, 549.4815063476562, 330.45794677734375, 231.58360290527344, 234.7417755126953, 568.0731811523438, 551.304931640625, 29.531208038330078, 242.42987060546875, 942.7938232421875, 644.099609375, 71.8974609375, -246.6929168701172, 434.9369812011719, 176.27891540527344, 418.311767578125, 182.296142578125, 189.69720458984375, 465.6583251953125, 1049.4080810546875, -14.315132141113281, -277.1267395019531, 97.02615356445312, -0.3487586975097656, 339.0293273925781, 833.107177734375, 212.06597900390625, 681.3988037109375, 905.3699340820312, 976.058349609375, 176.4382781982422, 1203.037353515625, 1304.695068359375, 874.1075439453125, 716.9444580078125, 210.10693359375, -118.05325317382812, 473.9496154785156, 582.9884643554688, 267.25732421875, -141.2977294921875, 499.0798034667969, -254.49142456054688, 471.93731689453125, -407.18951416015625, -369.92425537109375, -812.2875366210938, 390.9841003417969, 423.2264709472656, 91.54759216308594, 205.34103393554688, -193.73753356933594, 212.78207397460938, -463.1861267089844, 1016.685546875, 1119.949462890625, 244.39877319335938, -132.23226928710938, 885.9174194335938, 278.3385925292969, 142.7735137939453, 640.3973388671875, 258.0120849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 247.462158203125, "std": 462.2147521972656, "min": -1049.8070068359375, "p10": -271.16530151367186, "median": 256.01446533203125, "p90": 835.2641235351566, "max": 1326.3271484375, "pos_frac": 0.71875, "sample": [-62.166778564453125, 534.0584716796875, 398.8736572265625, -461.0784606933594, 1073.80029296875, -235.92379760742188, 110.26499938964844, 490.4100341796875, 1050.739501953125, 617.0046997070312, 77.80955505371094, 602.1752319335938, -274.68731689453125, 910.3236694335938, 292.8630676269531, -914.5095825195312, 1326.3271484375, 533.274658203125, 31.20061492919922, -55.85469055175781, 295.8494873046875, -58.02534484863281, 52.03608703613281, 426.54620361328125, 647.1898803710938, 147.94461059570312, 269.501953125, 562.1699829101562, 505.81744384765625, 52.133323669433594, -40.94403076171875, 712.3197631835938, -129.76052856445312, -85.3282470703125, 422.1676025390625, 92.2140884399414, 447.3863525390625, -39.749794006347656, -246.88824462890625, 15.416305541992188, 335.01922607421875, -281.5397033691406, 963.591064453125, 526.7081909179688, -1049.8070068359375, 678.1952514648438, 1263.3885498046875, 34.23564910888672, 26.3802490234375, 41.32273864746094, 163.98513793945312, 874.2258911132812, -320.6537170410156, 649.8607177734375, -262.947265625, 242.5269775390625, 744.3533325195312, 20.30398178100586, 395.52569580078125, 517.703125, 306.6011657714844, -59.54996871948242, 364.1642761230469, -428.91864013671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 315.1514892578125, "std": 492.06689453125, "min": -1013.7603759765625, "p10": -175.10222625732422, "median": 233.11238861083984, "p90": 954.0400878906252, "max": 2218.21240234375, "pos_frac": 0.75, "sample": [-159.92977905273438, -241.4840087890625, 975.74658203125, 117.67649841308594, 366.5174255371094, 134.71243286132812, 442.3024597167969, -77.13787841796875, -138.21466064453125, 168.61651611328125, 44.58204650878906, -56.678558349609375, 625.343994140625, 23.902511596679688, -82.24698638916016, 710.1697387695312, 1192.1971435546875, 2218.21240234375, -162.86434936523438, 385.3367004394531, 1112.1357421875, 1300.75927734375, -358.990966796875, 200.934326171875, -83.70816040039062, 857.9075317382812, 128.31710815429688, 629.6008911132812, 409.5489501953125, 306.38397216796875, 272.46771240234375, 251.2334747314453, 1348.255126953125, 118.51457214355469, 63.271392822265625, 383.19921875, 507.658203125, 378.9736022949219, 736.2155151367188, -1013.7603759765625, 576.3588256835938, 130.39950561523438, 228.48875427246094, 385.77935791015625, -6.516845703125, 909.7186889648438, 216.725341796875, 830.2651977539062, 492.23876953125, 237.73602294921875, 180.0740203857422, -230.8370361328125, -272.2619323730469, 205.26409912109375, 237.95443725585938, 582.919677734375, -212.54107666015625, 973.0349731445312, -180.34703063964844, 465.9974060058594, 281.0443115234375, -88.28288269042969, 17.930938720703125, 172.87567138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 359.2563781738281, "std": 464.9327697753906, "min": -659.1854248046875, "p10": -105.01707534790037, "median": 265.20970153808594, "p90": 949.901953125, "max": 1675.023681640625, "pos_frac": 0.765625, "sample": [464.76165771484375, 790.7972412109375, 339.3795471191406, 27.570152282714844, -28.30411720275879, -659.1854248046875, -178.32894897460938, -21.908966064453125, -114.704345703125, 477.1181640625, 38.25645446777344, -249.34259033203125, 31.651073455810547, 543.1790771484375, 485.6293029785156, 1251.673828125, 114.8924560546875, 31.15195083618164, -196.4071807861328, 941.9522705078125, 265.27825927734375, -10.756332397460938, -80.43444061279297, 224.3739776611328, 571.1822509765625, 642.6820068359375, 683.9690551757812, 209.71792602539062, 228.78005981445312, -2.5279541015625, 344.433837890625, 616.8539428710938, 8.302940368652344, 1164.7646484375, 161.94180297851562, 651.467529296875, 541.7965087890625, 265.1411437988281, 497.6886291503906, -82.41344451904297, 242.88323974609375, 455.636474609375, 379.2073974609375, 13.514636993408203, 200.44512939453125, 1150.4853515625, -126.10932922363281, 1179.68408203125, -15.380096435546875, -552.6417236328125, 539.6826171875, 418.968994140625, 696.9840087890625, -17.588821411132812, 834.1989135742188, 953.3089599609375, 95.76005554199219, 1539.6591796875, 71.53064727783203, 1675.023681640625, 506.9967956542969, 39.64957809448242, 848.59619140625, 869.8381958007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 321.73687744140625, "std": 517.5418701171875, "min": -938.2258911132812, "p10": -146.41609649658199, "median": 216.2401123046875, "p90": 1122.395104980469, "max": 1505.04150390625, "pos_frac": 0.75, "sample": [293.666259765625, 782.1485595703125, 885.6911010742188, -66.12337493896484, 1129.7138671875, 94.990478515625, 1505.04150390625, 197.94754028320312, 142.2915802001953, -46.24342727661133, -109.85067749023438, 411.7366638183594, 790.9218139648438, 45.25425720214844, 580.5402221679688, -17.757476806640625, 229.38992309570312, -63.43352127075195, 477.9476318359375, 219.53887939453125, 671.4783935546875, 622.4143676757812, 128.03302001953125, 372.07861328125, -680.161376953125, 1223.6279296875, 201.90704345703125, 161.96896362304688, 578.4738159179688, 634.1688842773438, 236.4423828125, 147.7451629638672, 99.17707824707031, -9.95440673828125, 352.67559814453125, -938.2258911132812, 509.8763732910156, 185.47396850585938, 237.5029296875, -412.8240966796875, -162.0869903564453, 453.2642822265625, 1105.3179931640625, 55.04022979736328, -385.491943359375, 575.3980712890625, 915.9192504882812, 283.3736572265625, -822.1234741210938, 1366.7086181640625, 1226.035400390625, -39.430633544921875, 1205.0333251953125, 106.91803741455078, 117.38650512695312, -530.5186767578125, -8.509300231933594, -63.333885192871094, 634.5087280273438, 212.94134521484375, 1074.814697265625, 120.22686767578125, 1233.5745849609375, 110.90425109863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 339.26068115234375, "std": 570.0612182617188, "min": -974.3993530273438, "p10": -293.1639602661132, "median": 318.935546875, "p90": 1186.4226562500005, "max": 1682.5748291015625, "pos_frac": 0.75, "sample": [-156.99681091308594, 1232.688720703125, 133.47254943847656, 242.4708709716797, 406.12725830078125, -339.71563720703125, 372.16485595703125, 485.9715270996094, 1010.8788452148438, 392.52862548828125, -718.1670532226562, 649.0088500976562, 2.3510875701904297, -174.16543579101562, -465.0624084472656, 53.13304138183594, 234.5069580078125, 818.3717041015625, -616.2469482421875, 245.1236572265625, 696.934814453125, 1230.213134765625, -111.6741943359375, 3.8530139923095703, 530.9306640625, 122.79005432128906, 319.5481872558594, 51.845916748046875, 0.32508087158203125, 1599.740234375, 498.20880126953125, 107.53605651855469, -120.12100219726562, -974.3993530273438, 1084.244873046875, 902.0084228515625, -518.74609375, -135.1842803955078, 532.7575073242188, -184.5433807373047, 575.7974853515625, -78.60198974609375, 1682.5748291015625, 422.9969787597656, 710.4293823242188, 21.152984619140625, 440.5923767089844, 318.3229064941406, 1586.6795654296875, 432.8688659667969, 614.534423828125, 114.3455810546875, 604.1171875, 59.48870086669922, 763.5215454101562, 1270.0723876953125, 939.2973022460938, -89.94268798828125, 650.019287109375, 1282.7059326171875, 595.654296875, -650.0577392578125, 116.39708709716797, -114.99606323242188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 365.68072509765625, "std": 543.0574340820312, "min": -1518.1031494140625, "p10": -91.8570098876953, "median": 454.67152404785156, "p90": 997.9398437500001, "max": 1817.220458984375, "pos_frac": 0.71875, "sample": [22.146835327148438, 839.667724609375, 516.5564575195312, 463.45458984375, 511.1498107910156, 211.7225341796875, 537.8803100585938, -16.80901336669922, 580.1612548828125, 427.25970458984375, 463.536865234375, 79.32418823242188, 528.01708984375, 504.689697265625, 768.41162109375, 381.9623107910156, -14.669174194335938, 967.496337890625, 352.9339599609375, 649.8154296875, -1518.1031494140625, 514.1279296875, 535.2802124023438, 1817.220458984375, -52.36903381347656, 37.35382080078125, 1010.987060546875, 387.0686340332031, 542.2302856445312, -82.826416015625, -17.99112319946289, 490.348876953125, 523.8388671875, -196.39646911621094, 873.8228759765625, 373.8695983886719, 1314.439697265625, 1471.6910400390625, 690.51220703125, 411.51690673828125, -70.1761703491211, -923.0332641601562, -4.1624755859375, -1.4349899291992188, 700.7879028320312, -95.72726440429688, 370.3013916015625, 5.443855285644531, 434.89447021484375, 485.1707458496094, 1146.582275390625, 1129.1522216796875, 710.4386596679688, -972.6787109375, -72.15291595458984, -50.80149841308594, 1069.6943359375, 713.135009765625, -231.8589630126953, 448.3313293457031, 594.9886474609375, 461.01171875, -282.1510009765625, -63.518768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 448.82427978515625, "std": 561.54345703125, "min": -699.8955688476562, "p10": -100.29976196289061, "median": 331.2705535888672, "p90": 1286.1549560546878, "max": 1750.759521484375, "pos_frac": 0.828125, "sample": [406.54718017578125, 1110.467529296875, 464.0411376953125, 111.35237884521484, 130.48846435546875, 465.0885925292969, 1701.090576171875, 732.5447387695312, 233.47726440429688, -233.37533569335938, 1238.0631103515625, -449.2587890625, 1000.5901489257812, 228.5142059326172, 258.786376953125, -221.1621551513672, 197.6804656982422, 193.37667846679688, 993.7108154296875, 738.7842407226562, 656.1487426757812, 67.11447143554688, -135.2744140625, 128.66043090820312, -108.17071533203125, 338.31781005859375, 570.9461669921875, 537.1865234375, 338.0036926269531, 1306.7657470703125, 1090.194091796875, 7.617698669433594, 1690.708984375, 24.356346130371094, 1750.759521484375, 324.53741455078125, 159.2554931640625, 170.50668334960938, 440.68939208984375, -50.443756103515625, 1678.92236328125, -699.8955688476562, 23.66668701171875, -8.291105270385742, 348.7460021972656, 308.0149841308594, 597.1754150390625, 81.34494018554688, 582.128173828125, 197.46505737304688, 742.2039794921875, 1583.278564453125, -81.9342041015625, -27.84796142578125, 666.689208984375, 275.7848815917969, 339.1538391113281, 37.755638122558594, -616.0169677734375, 998.1661376953125, 241.7152099609375, 1488.2730712890625, 438.0138854980469, 921.5553588867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 437.71282958984375, "std": 589.3935546875, "min": -1123.2374267578125, "p10": -210.35189819335938, "median": 485.7522277832031, "p90": 1217.6861938476563, "max": 1640.574462890625, "pos_frac": 0.75, "sample": [1255.930419921875, 1013.4779052734375, 102.47227478027344, 242.36968994140625, 309.9229736328125, 280.620361328125, -195.82472229003906, -29.93817710876465, 509.21826171875, -788.8764038085938, 477.80926513671875, 749.4258422851562, 748.64501953125, 270.9098205566406, 1368.3233642578125, 1012.9136962890625, -216.43063354492188, 760.8685913085938, 264.0852966308594, 646.7089233398438, 396.1948547363281, 664.8193359375, 137.02088928222656, 1126.072265625, 580.1320190429688, 649.1007080078125, 1640.574462890625, 89.21931457519531, 347.0714111328125, -143.5823211669922, -220.7421112060547, 621.1444702148438, -479.6929626464844, 1317.1492919921875, -196.16818237304688, 376.0580749511719, 1189.2205810546875, 1144.3704833984375, 246.618896484375, -1123.2374267578125, 942.5285034179688, 729.8460693359375, 493.6951904296875, 1314.4466552734375, -543.1893920898438, 747.0506591796875, 746.4627685546875, 1132.6358642578125, -136.83407592773438, 528.172607421875, -80.11221313476562, 157.7871856689453, 356.3046569824219, 1229.8857421875, 920.9202880859375, -79.93246459960938, 1245.423828125, 578.8831176757812, 521.5133666992188, -51.11466979980469, -194.16632080078125, 20.397531509399414, -862.92626953125, 1151.9647216796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 293.4132385253906, "std": 455.9350280761719, "min": -715.2634887695312, "p10": -279.81849975585936, "median": 262.86080169677734, "p90": 808.6153564453125, "max": 1256.6671142578125, "pos_frac": 0.765625, "sample": [-636.756103515625, 574.1002197265625, 436.0881042480469, 398.5281677246094, 801.1573486328125, 165.11282348632812, 1247.62646484375, -84.75267028808594, -121.59841918945312, 707.520263671875, -190.66148376464844, -246.77426147460938, 675.7432250976562, 306.53277587890625, -627.8977661132812, -530.1482543945312, 527.7451782226562, -158.6927490234375, 468.50628662109375, 340.08465576171875, -365.8431396484375, 1051.0943603515625, 134.808837890625, 55.234249114990234, 111.87252807617188, 783.894287109375, -98.16758728027344, 90.77149200439453, 940.5504150390625, 778.24560546875, 616.7178955078125, 1256.6671142578125, 424.4041748046875, 636.5167236328125, 1168.5626220703125, 1160.1412353515625, 811.8116455078125, -50.33954620361328, 244.5865936279297, 374.8041687011719, 645.2189331054688, 159.67181396484375, 323.16534423828125, 281.135009765625, -132.40655517578125, 35.685760498046875, 419.7742919921875, 230.71212768554688, 146.85546875, -715.2634887695312, 240.45423889160156, 121.56756591796875, 603.2487182617188, 191.51513671875, 75.55984497070312, 74.79667663574219, 685.93994140625, 607.210693359375, 620.3402099609375, 369.36114501953125, 209.94747924804688, -293.9803161621094, 49.24956512451172, -349.10955810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 362.7888488769531, "std": 623.7684326171875, "min": -803.993408203125, "p10": -442.04655761718743, "median": 313.88795471191406, "p90": 1150.6494018554688, "max": 2082.940185546875, "pos_frac": 0.71875, "sample": [-90.90946197509766, 1132.076416015625, 359.896484375, 317.3053283691406, 184.12225341796875, -594.8671875, -46.18341827392578, 732.3602294921875, 646.9601440429688, 1388.18212890625, 470.89697265625, -124.33573913574219, 438.59912109375, -480.11517333984375, 97.9148941040039, 540.7417602539062, 348.85491943359375, -120.09475708007812, 1224.86083984375, 2082.940185546875, -63.612037658691406, 1035.3836669921875, 492.60015869140625, 869.0796508789062, 458.4942626953125, 986.3242797851562, -484.7347412109375, 1326.4654541015625, -353.21978759765625, 1034.75439453125, -728.6597900390625, 310.4705810546875, 367.87152099609375, 244.85433959960938, 317.9825439453125, 179.8361358642578, 653.184326171875, 113.74861145019531, 504.8482360839844, 118.56424713134766, -285.7608947753906, 959.9190063476562, -196.88180541992188, -803.993408203125, 1525.4031982421875, 270.9828186035156, 654.0372314453125, 1808.94482421875, 164.9816436767578, 35.23468017578125, -682.1958618164062, -788.1870727539062, 793.863525390625, 1158.6092529296875, 140.92172241210938, 940.6491088867188, 611.9473266601562, 176.60321044921875, -345.08868408203125, -15.728897094726562, 119.17694091796875, -96.24983215332031, 1069.134521484375, 108.72173309326172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 253.69570922851562, "std": 584.20166015625, "min": -1506.49755859375, "p10": -468.6559661865234, "median": 267.02618408203125, "p90": 941.9683471679688, "max": 1530.1656494140625, "pos_frac": 0.6875, "sample": [272.33837890625, -225.6696319580078, 1530.1656494140625, 68.35614013671875, 175.9075469970703, 524.9376831054688, 504.6481018066406, 819.055908203125, 701.4122314453125, 1439.7392578125, -316.1903991699219, 483.9676513671875, 123.3075180053711, -478.08251953125, 544.9409790039062, -102.09351348876953, 138.89834594726562, 71.47078704833984, 800.267578125, -111.4731216430664, -359.9206848144531, 261.7139892578125, 1222.502197265625, 940.5536499023438, 1084.62890625, -668.4747314453125, -67.75489044189453, 351.7605285644531, -572.6834106445312, 624.0123901367188, 283.3503723144531, 486.1748352050781, -342.0491638183594, 418.7643737792969, -9.968780517578125, 62.25126647949219, 755.444091796875, 111.87681579589844, 627.0143432617188, 755.4822998046875, -1506.49755859375, 180.6741943359375, 1027.3770751953125, -547.8565673828125, 610.7770385742188, -992.0657348632812, 173.60116577148438, 683.0184936523438, 942.5746459960938, 83.58243560791016, -693.4972534179688, 450.922607421875, -48.89698028564453, 569.5922241210938, -431.91424560546875, 572.9779052734375, 257.8829650878906, 871.59423828125, 554.2189331054688, 1200.1453857421875, -446.6606750488281, -276.86236572265625, -228.39804077148438, 299.6512756347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 393.9274597167969, "std": 696.6029052734375, "min": -2145.302734375, "p10": -250.3104064941406, "median": 226.36418914794922, "p90": 1437.4841308593755, "max": 2313.85107421875, "pos_frac": 0.71875, "sample": [192.66177368164062, -411.7802429199219, -18.246749877929688, 181.25042724609375, 296.98492431640625, -253.91732788085938, 1626.5765380859375, 684.0098876953125, 364.0683288574219, -656.2481689453125, 220.71571350097656, -72.69243621826172, -32.38751220703125, -133.71121215820312, 740.8129272460938, 787.4418334960938, 6.86033821105957, -45.6527099609375, -374.679443359375, 1320.0748291015625, 1678.5150146484375, 490.3977355957031, 926.1538696289062, 718.5551147460938, -118.42689514160156, 810.6680908203125, -132.5430908203125, 232.01266479492188, -295.86669921875, 691.0138549804688, -4.178037643432617, 1657.255859375, 1215.84765625, 527.7317504882812, 375.3786926269531, -5.061359405517578, 882.9908447265625, 79.83304595947266, -241.89425659179688, -383.1625061035156, 181.9708709716797, 1801.4241943359375, 739.4874877929688, 2313.85107421875, 11.630617141723633, 364.9742126464844, 294.9721374511719, 323.11065673828125, 1533.265869140625, 189.49044799804688, 377.67913818359375, 47.06793975830078, 972.3646850585938, -2145.302734375, 1327.4990234375, -7.8094329833984375, 20.227691650390625, 1484.62060546875, 525.7068481445312, 204.00238037109375, 81.41552734375, 168.36114501953125, 693.2744140625, 180.71047973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 323.4900207519531, "std": 560.1647338867188, "min": -1191.107666015625, "p10": -250.90604705810546, "median": 283.8900604248047, "p90": 1001.2312438964846, "max": 1776.083251953125, "pos_frac": 0.75, "sample": [619.689453125, 102.84261322021484, 673.8316650390625, 1023.406982421875, 683.0551147460938, -468.13189697265625, 294.557373046875, 154.13711547851562, 1729.6795654296875, -228.84072875976562, -235.5539093017578, 444.6747131347656, 7.112699508666992, 341.3760070800781, 289.5416259765625, 201.08815002441406, 302.7554626464844, 237.38294982910156, -1191.107666015625, 369.2908630371094, 949.4878540039062, 639.0438842773438, 454.36248779296875, 84.47640991210938, 862.5765380859375, 176.28094482421875, 416.1846008300781, 311.37701416015625, 468.4296875, -103.9835433959961, 1280.8599853515625, -257.48553466796875, 683.4688720703125, 623.463623046875, 78.20118713378906, 244.78286743164062, -39.534690856933594, -595.518310546875, -17.816944122314453, 177.03778076171875, 1776.083251953125, 336.3602294921875, 86.0628890991211, -19.141769409179688, 1756.9139404296875, -519.2552490234375, 516.3367919921875, 1343.965576171875, 251.954833984375, 884.0137939453125, 1188.1458740234375, -13.713722229003906, 626.1669921875, -234.95932006835938, 278.2384948730469, 169.1381072998047, 471.0654296875, 135.44276428222656, 513.3194580078125, -130.5934295654297, 165.789794921875, 449.0802001953125, -826.5802001953125, -286.9564208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 363.9248046875, "std": 673.6116943359375, "min": -1262.754150390625, "p10": -476.9909790039062, "median": 292.8876953125, "p90": 1275.1741577148443, "max": 1862.5616455078125, "pos_frac": 0.765625, "sample": [1153.79345703125, 503.14776611328125, 1603.720458984375, -234.10140991210938, 1327.1944580078125, 248.56002807617188, 985.4202880859375, 928.75439453125, -1262.754150390625, 299.16943359375, -407.2021179199219, 1356.005615234375, 542.3699951171875, 790.9890747070312, -443.6715087890625, 577.5010986328125, 618.2474365234375, -1237.9375, 157.83958435058594, 328.1030578613281, 461.1517028808594, 1661.3603515625, 1862.5616455078125, 874.2373046875, 658.783203125, 746.5, -115.73103332519531, 1099.9112548828125, 1361.7098388671875, 240.51739501953125, 22.54796600341797, 384.0563049316406, -168.22222900390625, 1124.90283203125, 830.36572265625, 204.26052856445312, 239.86056518554688, 1002.0859375, -41.372474670410156, 318.7061462402344, 15.939567565917969, 799.4644775390625, 152.85064697265625, 539.61376953125, 191.67848205566406, -104.9823989868164, 23.60443878173828, 794.6622924804688, 164.03341674804688, 84.32347869873047, -508.8437805175781, -746.9631958007812, 275.85760498046875, 1663.771484375, 286.60595703125, -21.7572021484375, 186.7236785888672, -1066.78564453125, 35.55908203125, 222.82559204101562, -491.270751953125, 563.6712646484375, -700.0833129882812, 327.345703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 260.69586181640625, "std": 596.0636596679688, "min": -1661.6639404296875, "p10": -331.25523376464844, "median": 259.32904052734375, "p90": 1017.8581726074219, "max": 2309.0390625, "pos_frac": 0.65625, "sample": [559.418212890625, -410.0374755859375, 877.9488525390625, -687.9935913085938, 429.68035888671875, 180.98947143554688, 1266.7880859375, 596.2633056640625, 313.6385498046875, 382.0889892578125, 743.479736328125, 658.0709838867188, 640.812744140625, 807.1399536132812, -184.0946044921875, 359.8665466308594, 1010.1532592773438, 117.5877456665039, 269.4068603515625, -847.6912841796875, 1236.0758056640625, 291.0260009765625, -126.50457763671875, 547.1121826171875, 80.15866088867188, 637.2783813476562, -93.1865005493164, 113.33238220214844, 249.251220703125, 2309.0390625, 425.60760498046875, 338.1364440917969, -95.97213745117188, -40.245811462402344, -27.00656509399414, -1661.6639404296875, -284.1888732910156, 310.3949890136719, -315.4570617675781, 403.1930847167969, 141.7619171142578, 190.52845764160156, 1150.5023193359375, 1275.239013671875, -183.69430541992188, -15.292800903320312, -314.8984069824219, -4.9614105224609375, 297.12835693359375, 328.9814147949219, 358.8785705566406, -110.71739196777344, 1021.1602783203125, 121.67827606201172, -351.7369384765625, -338.02587890625, -752.1419067382812, 141.4630126953125, 155.41937255859375, 1150.789306640625, -190.3385009765625, -73.91543579101562, 792.7139892578125, 514.1178588867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 356.9794616699219, "std": 554.6023559570312, "min": -1020.5357055664062, "p10": -231.8296356201172, "median": 305.0548553466797, "p90": 1022.4210083007815, "max": 1835.0435791015625, "pos_frac": 0.78125, "sample": [359.130126953125, 1063.6763916015625, -44.681121826171875, 1822.20068359375, 74.32986450195312, 752.74462890625, -590.2037963867188, 304.2735290527344, 315.82257080078125, -488.4410095214844, 79.0685806274414, 1214.769287109375, -236.88961791992188, 194.58084106445312, 1255.7462158203125, 941.1663208007812, 865.58544921875, -283.8882751464844, 424.3244934082031, 873.27734375, 305.836181640625, -174.59481811523438, 479.3750915527344, 448.42083740234375, 281.030517578125, 381.93304443359375, 966.9510498046875, -110.39718627929688, 335.0595703125, 107.9543228149414, 129.310546875, 833.6326904296875, 259.02862548828125, 314.5415954589844, 707.169189453125, -566.730224609375, -156.18991088867188, 1835.0435791015625, 815.5045776367188, 1046.19384765625, 164.6441650390625, 110.02543640136719, -94.3895263671875, 150.29251098632812, 727.2942504882812, 194.2379608154297, 183.49147033691406, 811.9495849609375, 1479.0255126953125, 942.2183227539062, 627.8816528320312, 381.8204040527344, 681.2493896484375, 145.3610076904297, 373.5263977050781, 82.2569580078125, 325.1832580566406, 239.234619140625, 229.09341430664062, 63.4486083984375, -175.29061889648438, -1020.5357055664062, -220.02301025390625, -690.9752197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 413.74462890625, "std": 542.5011596679688, "min": -803.3751220703125, "p10": -189.39199066162107, "median": 373.5218200683594, "p90": 1112.6363525390627, "max": 2241.031494140625, "pos_frac": 0.78125, "sample": [-322.53790283203125, 2241.031494140625, -294.57373046875, 394.4276123046875, -139.5451202392578, 253.88275146484375, 94.32568359375, 976.327880859375, 159.6536865234375, 761.5213623046875, 269.580322265625, -19.690025329589844, 244.73097229003906, 969.6007080078125, 430.3063659667969, 411.90264892578125, 112.39466094970703, 1272.65576171875, 299.0521545410156, 414.43603515625, 982.673095703125, 536.5252075195312, 567.836669921875, 477.1440734863281, -38.55424499511719, 1466.7130126953125, 1219.260986328125, 491.2437438964844, -41.944854736328125, 318.9715881347656, 1501.6558837890625, 709.8604125976562, 1033.5958251953125, -373.5334167480469, -282.3210754394531, -803.3751220703125, 467.8674621582031, 873.2576293945312, 101.58456420898438, 456.6007080078125, 352.61602783203125, 1146.5108642578125, 752.9290771484375, 210.00192260742188, 496.544189453125, -90.73759460449219, 71.55193328857422, 714.232177734375, -200.11366271972656, -472.1875, 626.45703125, 108.04717254638672, 455.8106689453125, 42.127296447753906, -133.1424102783203, -164.374755859375, 1002.7474365234375, 175.56625366210938, 1501.4801025390625, 273.2283630371094, 32.068214416503906, 490.2099609375, 339.66497802734375, 553.87451171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 491.8769226074219, "std": 671.7105102539062, "min": -1078.899658203125, "p10": -72.24081497192383, "median": 297.1905212402344, "p90": 1200.509851074219, "max": 3765.138427734375, "pos_frac": 0.828125, "sample": [112.59493255615234, 820.4472045898438, 299.718017578125, 1012.7040405273438, 1214.409423828125, -7.8091888427734375, 194.87155151367188, 132.43328857421875, 77.89810180664062, -33.304931640625, 987.99462890625, 380.3710021972656, 1264.18017578125, 867.2518310546875, -170.9856719970703, 538.1239013671875, 1252.2041015625, 129.20310974121094, 948.0947265625, 798.3674926757812, 2090.75244140625, 617.8594970703125, -132.26480102539062, 916.476806640625, -26.724586486816406, 1060.3111572265625, 26.10308074951172, 535.8853149414062, 724.284423828125, 2.716064453125, 3765.138427734375, 782.2265625, 300.3248291015625, 190.78073120117188, 523.1641235351562, 701.2810668945312, 55.65019226074219, 1168.0775146484375, -254.30538940429688, 56.839412689208984, 151.7089385986328, -1078.899658203125, 1531.742919921875, 1005.10986328125, 652.8680419921875, -134.03929138183594, 156.87960815429688, 257.03497314453125, 294.66302490234375, 84.4898681640625, 216.85504150390625, -72.58533477783203, 260.2073669433594, 247.42279052734375, 253.1568603515625, 740.593505859375, 150.9833221435547, -71.43693542480469, -525.7957153320312, 77.91452026367188, 957.2911376953125, 726.582275390625, 433.53167724609375, 1240.4991455078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 329.5657653808594, "std": 553.6480102539062, "min": -1252.09033203125, "p10": -271.19244384765625, "median": 266.77239990234375, "p90": 1123.363232421875, "max": 1408.593017578125, "pos_frac": 0.796875, "sample": [-1252.09033203125, -735.5205078125, 24.682754516601562, 184.59375, -523.0596313476562, 331.81268310546875, 50.70115661621094, 904.6925659179688, 9.089019775390625, -36.896549224853516, 525.6693115234375, 3.7984771728515625, 500.2103576660156, 303.2777099609375, -277.4222412109375, 892.3646240234375, 1128.188232421875, -41.5504150390625, 197.39947509765625, -32.09795379638672, 38.78350830078125, 884.5162963867188, 55.20874786376953, -722.615478515625, 951.3233642578125, -256.65625, 1408.593017578125, 1124.29150390625, 384.1730041503906, 543.6720581054688, 1046.85107421875, 352.0965576171875, 1033.2216796875, 254.87103271484375, 734.2874755859375, -11.290672302246094, 261.0496826171875, 583.0162353515625, 272.4951171875, -469.26971435546875, 196.19784545898438, 384.6819152832031, -84.86405944824219, -870.1827392578125, 19.198223114013672, 1127.5557861328125, 13.038143157958984, 166.7667999267578, 330.0701599121094, 776.5626220703125, 154.593017578125, 1380.6871337890625, 154.07278442382812, 1121.803955078125, 579.853759765625, 78.3696517944336, 1159.9132080078125, 123.092041015625, 1124.031494140625, 352.7125244140625, 476.82769775390625, 595.8604736328125, 111.4864501953125, 993.4202880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 372.10687255859375, "std": 496.5312805175781, "min": -618.9568481445312, "p10": -213.29781494140624, "median": 385.4260711669922, "p90": 999.3600524902346, "max": 1867.496337890625, "pos_frac": 0.75, "sample": [136.7161407470703, 440.75372314453125, 730.216552734375, 1118.65966796875, 542.2535400390625, -175.80133056640625, 818.046630859375, 651.8162841796875, 571.4469604492188, 414.0234680175781, -37.55229949951172, 1643.00634765625, 556.0873413085938, 423.931884765625, 249.54673767089844, 957.7423095703125, -515.7347412109375, -40.22895050048828, 334.76605224609375, 1073.0323486328125, 245.98829650878906, 481.1670227050781, 82.35116577148438, 643.1000366210938, 623.0123901367188, 99.61018371582031, -98.77008056640625, 1376.8348388671875, -32.68788146972656, -379.3576965332031, -618.9568481445312, 816.1138305664062, 228.43763732910156, -166.9676971435547, 1867.496337890625, 639.76416015625, 627.3261108398438, 528.1278686523438, 253.80445861816406, 706.769287109375, 184.39944458007812, -195.60333251953125, 11.965862274169922, 673.9461059570312, 302.9891052246094, 585.999267578125, -486.0964660644531, 549.078369140625, -187.80252075195312, 108.21556091308594, 576.8654174804688, 447.1009521484375, 356.82867431640625, 596.3379516601562, -229.9676971435547, 16.454017639160156, 318.4732666015625, 158.32801818847656, 1171.6158447265625, 1017.1962280273438, -257.17919921875, -220.88116455078125, 576.419677734375, -75.73682403564453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 354.651123046875, "std": 440.1429748535156, "min": -546.3563842773438, "p10": -107.62189331054687, "median": 270.34557342529297, "p90": 853.713397216797, "max": 1506.333740234375, "pos_frac": 0.765625, "sample": [285.7622375488281, 316.19976806640625, 41.523033142089844, -26.202730178833008, 365.8580627441406, 446.4676513671875, -216.13829040527344, 134.413818359375, -26.910247802734375, 1176.6309814453125, -210.0926513671875, -26.84501838684082, 1502.363525390625, 136.0043487548828, 908.5855712890625, 652.4327392578125, 637.2538452148438, 695.4102172851562, 43.96875, 771.4637451171875, 1506.333740234375, 143.42657470703125, 182.67286682128906, 694.7742919921875, 201.2449188232422, 134.34085083007812, 30.123641967773438, 481.041748046875, -48.73664855957031, 816.70751953125, -337.8783874511719, 610.0737915039062, -546.3563842773438, 482.33135986328125, 643.416748046875, 90.98736572265625, 41.453834533691406, 506.83526611328125, 206.2869873046875, -113.21994018554688, -69.91230773925781, 1281.9892578125, -94.55978393554688, 736.7526245117188, -68.24284362792969, -159.116943359375, 869.5730590820312, -227.25816345214844, 170.78652954101562, 57.752662658691406, 741.269775390625, 236.26907348632812, 757.117919921875, 419.2058410644531, 368.23504638671875, 254.9289093017578, 806.1041259765625, 534.1341552734375, 404.07781982421875, 508.102783203125, 11.504987716674805, 1257.15673828125, 583.3289794921875, -15.538286209106445], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 451.6276550292969, "std": 601.7568359375, "min": -699.009765625, "p10": -295.2442504882812, "median": 343.6016082763672, "p90": 1311.98125, "max": 2362.700927734375, "pos_frac": 0.765625, "sample": [474.4832763671875, 540.29541015625, 720.4594116210938, -335.46954345703125, 1669.6290283203125, -227.6806640625, 267.6344909667969, 89.39324951171875, -699.009765625, -456.55712890625, -34.81855010986328, 509.9738464355469, -62.37001037597656, 313.0946044921875, 2362.700927734375, -90.27832794189453, 258.1220397949219, 58.843292236328125, 604.8601684570312, 738.305419921875, -6.747100830078125, 186.77703857421875, 415.6049499511719, 825.9033203125, 818.57666015625, 1159.5579833984375, 499.1020812988281, 1325.657470703125, 1326.7384033203125, 365.5219421386719, 575.690673828125, 376.965087890625, 674.3781127929688, 90.32093048095703, 294.84844970703125, 509.9101867675781, -138.28262329101562, -19.499603271484375, 1535.427490234375, 294.2994079589844, 1022.9556884765625, 1699.59619140625, 1593.228759765625, 1235.718017578125, -355.8449401855469, 656.9881591796875, 247.49453735351562, 210.9221954345703, 691.1842651367188, 321.6812744140625, -324.2000732421875, 297.7328186035156, 734.2852172851562, 806.8779296875, 178.10043334960938, 288.0926513671875, 780.810302734375, 207.49810791015625, 1280.070068359375, -95.73969268798828, 9.704444885253906, 632.31494140625, -563.2908325195312, -464.3723449707031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 249.92453002929688, "std": 551.2958984375, "min": -1070.833740234375, "p10": -402.1001708984374, "median": 213.39109802246094, "p90": 914.5479248046878, "max": 1835.90966796875, "pos_frac": 0.71875, "sample": [-770.731689453125, 190.5108642578125, 549.043701171875, 157.48220825195312, 203.3350830078125, 75.03042602539062, 339.3083190917969, 107.50532531738281, 262.77032470703125, -63.2076416015625, -212.85006713867188, 1097.389892578125, -878.2020874023438, -0.7944793701171875, 945.7955322265625, 31.19379425048828, 261.28472900390625, 1202.2039794921875, 86.30355072021484, 841.6368408203125, 1471.7283935546875, 814.2679443359375, -69.27523803710938, -583.2213745117188, 217.2808837890625, -308.6795654296875, 394.65423583984375, 312.1976623535156, -442.1375732421875, 684.1357421875, 610.6583251953125, 66.0616683959961, 745.74462890625, 178.7123565673828, 209.50131225585938, 155.58657836914062, 170.03955078125, 176.36338806152344, 636.1990356445312, 507.93701171875, -149.74412536621094, 180.20143127441406, 674.7325439453125, 993.4116821289062, 667.5120849609375, 268.067626953125, -1070.833740234375, -509.93798828125, 487.7272644042969, 736.5972290039062, -125.5376968383789, 441.7075500488281, 309.0205383300781, 1835.90966796875, 1059.12548828125, -293.18109130859375, -989.6929931640625, 679.8497314453125, -278.0705871582031, -235.22775268554688, -125.78093719482422, 221.01324462890625, 540.7200927734375, 304.81634521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 450.8837585449219, "std": 612.4697875976562, "min": -1197.924072265625, "p10": -241.57606964111324, "median": 383.9360656738281, "p90": 1194.0457763671875, "max": 2070.994140625, "pos_frac": 0.796875, "sample": [980.627197265625, 330.5841064453125, 273.0151062011719, 281.2573547363281, 167.3616943359375, 931.4406127929688, -340.18878173828125, 2070.994140625, 1204.864013671875, 452.599609375, 7.4717864990234375, 540.2352905273438, 795.6221923828125, -21.481781005859375, -60.22783660888672, 483.399169921875, 1940.838623046875, 313.3399658203125, -16.88811492919922, 1865.16748046875, 306.0212097167969, 55.51161193847656, 414.16180419921875, 454.123291015625, -427.3460388183594, 254.1273651123047, -74.1201171875, -375.95947265625, -182.64463806152344, 977.94873046875, -266.8323974609375, 835.732666015625, 939.2937622070312, 197.36026000976562, 1566.3895263671875, -52.25894546508789, 711.2559814453125, 470.25750732421875, -504.24810791015625, 629.653564453125, 295.18707275390625, 390.561279296875, 567.3746337890625, 179.66387939453125, 1434.5333251953125, 146.53671264648438, 293.6517333984375, -1197.924072265625, 721.4672241210938, 166.0618438720703, 637.3247680664062, 376.00347900390625, 125.97642517089844, 454.2403869628906, 1535.6243896484375, 1168.80322265625, 377.31085205078125, -835.5216674804688, 1070.9429931640625, 338.1365051269531, 473.0572814941406, 548.8441162109375, 636.283447265625, 823.961669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 315.904296875, "std": 708.8843383789062, "min": -890.1223754882812, "p10": -363.7146423339844, "median": 138.0014877319336, "p90": 1278.9625366210942, "max": 3231.552490234375, "pos_frac": 0.6875, "sample": [23.673728942871094, -90.71136474609375, 693.3203735351562, -14.36773681640625, 34.53826141357422, 2094.367919921875, 4.775625228881836, 20.861312866210938, -890.1223754882812, 719.0572509765625, -66.24381256103516, -763.609619140625, 9.997432708740234, 221.6661376953125, 1905.3524169921875, 119.51747131347656, 36.48469543457031, 3231.552490234375, -499.3959655761719, 0.13162994384765625, 580.646240234375, -478.44525146484375, -239.31158447265625, 193.5968475341797, -433.69140625, -3.9683609008789062, -124.85205078125, 591.1734619140625, -427.87091064453125, 222.2421417236328, 622.6268310546875, 239.2570037841797, -354.95947265625, 1331.8896484375, -135.76437377929688, -159.7890167236328, 1922.862548828125, 636.248291015625, 1155.4659423828125, 107.90753936767578, 1388.40576171875, 106.13533020019531, -224.01380920410156, 356.51434326171875, 772.269775390625, 28.47600555419922, 371.253173828125, 1659.6763916015625, -367.46685791015625, 590.4346313476562, 240.364013671875, -114.48571014404297, 262.96746826171875, -215.11416625976562, 472.02923583984375, 628.2314453125, 309.8334045410156, -286.8667297363281, 73.22660827636719, 353.9222106933594, 156.48550415039062, 560.712158203125, 520.7391967773438, 538.0374145507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 386.49005126953125, "std": 549.8319702148438, "min": -959.864013671875, "p10": -149.9546691894531, "median": 388.33082580566406, "p90": 1158.6942871093754, "max": 1675.88818359375, "pos_frac": 0.75, "sample": [1195.0567626953125, 610.6456298828125, 405.25457763671875, 451.5405578613281, 1572.7183837890625, -907.27587890625, 430.7387390136719, -46.090415954589844, 715.833251953125, -18.287996292114258, 1201.2703857421875, -77.44413757324219, 750.6615600585938, 305.885009765625, -806.619873046875, 376.9890441894531, 648.2311401367188, 199.767822265625, 48.59271240234375, 462.1947937011719, 409.2757568359375, 459.8855895996094, -153.47933959960938, 300.34136962890625, 75.72738647460938, 172.50462341308594, 752.3285522460938, 126.89642333984375, -311.12664794921875, -141.73043823242188, 1612.79150390625, 175.43870544433594, -78.56963348388672, 618.4534301757812, -959.864013671875, 1073.8485107421875, 470.9833984375, 399.672607421875, 714.72021484375, -132.8515167236328, 247.59027099609375, 30.296260833740234, 272.03106689453125, 1595.481689453125, 702.6400756835938, -74.06101989746094, 223.08531188964844, 1675.88818359375, 857.2752685546875, 528.353515625, -9.711307525634766, 163.89324951171875, 771.1665649414062, 1410.79248046875, 637.5651245117188, 306.6556701660156, 458.3725280761719, -179.5402069091797, 496.5178527832031, 189.3258514404297, -29.144271850585938, 773.4254150390625, -196.2381591796875, 778.7930908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 449.18927001953125, "std": 572.6865844726562, "min": -690.1390380859375, "p10": -83.80989456176752, "median": 306.4967498779297, "p90": 1479.907275390625, "max": 1717.211181640625, "pos_frac": 0.84375, "sample": [-105.62335205078125, -4.704132080078125, -635.8170166015625, 40.979522705078125, 290.665771484375, 820.1873168945312, 224.83279418945312, 102.06614685058594, 457.3609313964844, -345.4900817871094, 418.9836730957031, 700.685791015625, -28.32312774658203, 598.3085327148438, 1224.571044921875, 78.10974884033203, 1646.621826171875, 1472.5601806640625, 1332.16259765625, 350.1122741699219, 831.6157836914062, 62.00405502319336, 235.98731994628906, 269.74493408203125, 33.525169372558594, 267.6507263183594, 63.84002685546875, 488.82904052734375, 912.6522216796875, -32.911827087402344, 1485.7691650390625, 450.88934326171875, 111.35884857177734, -119.57139587402344, 1717.211181640625, 905.0464477539062, 11.232681274414062, 1483.0560302734375, -569.6424560546875, 436.6492919921875, 12.858741760253906, 504.1092224121094, 94.05511474609375, 529.225341796875, 373.792236328125, 767.0653686523438, 941.7930908203125, 1489.1329345703125, 275.7861022949219, 225.61126708984375, 126.0764389038086, 82.08648681640625, 322.3277282714844, 454.7598571777344, 179.46682739257812, 1564.5458984375, 1237.26513671875, -252.47683715820312, 332.1222839355469, 211.44622802734375, 561.3018798828125, -690.1390380859375, 230.16319274902344, 1492.5509033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 298.6014404296875, "std": 465.9460144042969, "min": -1337.849609375, "p10": -237.204963684082, "median": 294.5093688964844, "p90": 867.9114074707031, "max": 1274.7109375, "pos_frac": 0.734375, "sample": [-196.20944213867188, 102.7310562133789, 415.06488037109375, -87.3582763671875, 274.11627197265625, 586.7450561523438, 275.3353576660156, 583.144287109375, 185.5865020751953, 235.469482421875, -158.54177856445312, -211.71925354003906, -315.510986328125, -173.17498779296875, 721.49267578125, 117.13340759277344, 5.285377502441406, 460.1155700683594, 827.1134643554688, 244.31732177734375, -112.36193084716797, 858.9027099609375, -306.7209167480469, 1125.9248046875, 593.9299926757812, 800.57958984375, 785.411376953125, 496.3209228515625, 913.161865234375, 313.6833801269531, 1274.7109375, -255.65882873535156, -24.25469207763672, 497.0188293457031, 489.67205810546875, -722.5941772460938, 650.5935668945312, 473.05731201171875, 614.6334228515625, 915.1777954101562, -1337.849609375, 763.6258544921875, 199.19439697265625, 313.72540283203125, 466.9252624511719, 871.7722778320312, 123.60966491699219, 667.90380859375, -54.551429748535156, 152.8663787841797, 126.7718276977539, 129.67356872558594, 739.563720703125, 331.1054992675781, -210.4274139404297, 560.7881469726562, 151.66537475585938, 1070.8475341796875, -248.12741088867188, 114.3200912475586, -57.74772644042969, 383.9933776855469, -305.21392822265625, 883.7355346679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 379.1336669921875, "std": 541.4139404296875, "min": -1088.06201171875, "p10": -155.357225036621, "median": 341.33888244628906, "p90": 1014.3199584960942, "max": 2111.828125, "pos_frac": 0.75, "sample": [453.27362060546875, 123.90210723876953, -1088.06201171875, 1054.5648193359375, 46.20561599731445, -22.974334716796875, 305.5108337402344, 828.0548706054688, 177.80059814453125, 596.8999633789062, 186.74005126953125, 1608.805908203125, 56.792423248291016, 1701.592041015625, -27.617477416992188, -66.55884552001953, 603.366455078125, 616.5491943359375, -187.13157653808594, 828.14306640625, -80.18230438232422, 78.98517608642578, 403.5423278808594, 270.2779846191406, 541.941162109375, -191.65460205078125, 572.277099609375, 215.18772888183594, 224.43653869628906, 360.1535949707031, 920.415283203125, -58.06462097167969, 340.3378601074219, 679.9691162109375, 556.8665771484375, 490.8267517089844, -71.38560485839844, 853.1477661132812, 358.2832336425781, -8.64508056640625, 711.5205078125, 2111.828125, 591.7122802734375, -712.4773559570312, -250.98306274414062, 13.745445251464844, -370.17578125, 1069.9468994140625, 113.65141296386719, 505.2027282714844, 1531.730712890625, 191.0255584716797, 513.7525634765625, 430.41583251953125, 199.08567810058594, 342.33990478515625, 749.91064453125, -71.5374755859375, 382.48431396484375, -81.21707153320312, 727.4962768554688, 1225.563720703125, 294.76239013671875, -207.80426025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 355.373291015625, "std": 562.1937866210938, "min": -889.7100830078125, "p10": -209.75458831787108, "median": 312.6634063720703, "p90": 821.6160705566408, "max": 2544.035888671875, "pos_frac": 0.78125, "sample": [13.460708618164062, 694.27978515625, 358.03497314453125, 331.53497314453125, 44.08226776123047, 375.2674255371094, -889.7100830078125, 1312.087890625, 431.0500183105469, 318.7552185058594, 413.58624267578125, 623.2703857421875, 1158.3812255859375, 294.22198486328125, 715.9000244140625, -281.79315185546875, -80.85456848144531, 412.0637512207031, 908.8365478515625, 144.39791870117188, 771.9461059570312, 365.5151062011719, 208.4573974609375, -352.08514404296875, 496.7644348144531, 52.666160583496094, 201.9253692626953, 320.9268493652344, 306.57159423828125, 169.78627014160156, -219.48898315429688, 2544.035888671875, 842.9031982421875, 186.948974609375, 160.3335418701172, -393.97271728515625, -73.03788757324219, 742.5427856445312, -275.9489440917969, 2119.362060546875, 70.81658935546875, -35.32241439819336, -187.04100036621094, 381.0196533203125, -86.28921508789062, 58.388214111328125, 447.11932373046875, 133.4183349609375, 325.7315979003906, -348.5277404785156, 553.7359008789062, 355.3391418457031, -44.58039855957031, 2002.290771484375, 456.52728271484375, 591.8984375, 106.61109924316406, -39.755401611328125, 259.25518798828125, 259.2888488769531, 64.9369125366211, 638.513916015625, 645.42431640625, 662.0841064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 275.03582763671875, "std": 504.9320068359375, "min": -836.0098876953125, "p10": -261.7918182373046, "median": 216.33502960205078, "p90": 903.9227294921877, "max": 1533.4620361328125, "pos_frac": 0.703125, "sample": [264.5709533691406, 843.7472534179688, 94.80091857910156, -131.04269409179688, 154.31761169433594, 99.3773193359375, -59.53486633300781, 513.7713623046875, 640.0529174804688, 819.8013305664062, -119.05848693847656, 1278.7998046875, 79.59049224853516, 287.8711853027344, -65.15240478515625, -350.769775390625, 929.7122192382812, -340.1234436035156, -161.86795043945312, -687.193603515625, 635.842041015625, 1035.27978515625, -304.6163330078125, 370.80145263671875, 270.8168029785156, 389.03546142578125, 449.84075927734375, -44.20940399169922, 706.635498046875, 732.0772094726562, 41.58673095703125, 225.65711975097656, 264.7935485839844, -126.34212493896484, 431.9691467285156, -836.0098876953125, 117.1143569946289, 669.6888427734375, 78.0449447631836, 75.96992492675781, 22.809661865234375, -1.9527816772460938, 349.1783447265625, 687.7186889648438, 1309.6463623046875, -102.75288391113281, -100.92613220214844, 258.32794189453125, -307.3424072265625, 1533.4620361328125, 1378.87255859375, -800.5541381835938, -55.23805236816406, 656.1951293945312, 419.84771728515625, -126.74349212646484, 207.012939453125, 158.23080444335938, 606.6478271484375, 1520.66796875, 354.326171875, 232.5897979736328, 117.36735534667969, 9.255577087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 325.55517578125, "std": 514.3424072265625, "min": -1104.5235595703125, "p10": -182.15411071777342, "median": 332.2757568359375, "p90": 860.827655029297, "max": 2012.857421875, "pos_frac": 0.8125, "sample": [336.4244079589844, 352.4216003417969, 1094.5693359375, 402.2018737792969, 328.1271057128906, 661.6036987304688, 1090.2049560546875, 271.13372802734375, -115.51597595214844, 170.77098083496094, 628.3547973632812, 409.14666748046875, 557.707763671875, 184.39208984375, 321.2083740234375, 1875.8355712890625, 60.86022186279297, -336.0679016113281, 83.01905822753906, 38.953433990478516, -1104.5235595703125, -16.4200439453125, 468.23541259765625, 833.1882934570312, -120.01863098144531, 872.673095703125, 829.4041748046875, 98.34546661376953, 229.628662109375, 184.9786834716797, 41.37565612792969, -689.3457641601562, 450.42724609375, -107.2165756225586, 381.4144592285156, 736.9088134765625, 23.645889282226562, 484.24139404296875, 2012.857421875, 432.2334289550781, 339.855224609375, 573.2015991210938, 108.41084289550781, 50.170745849609375, 1231.2816162109375, 380.44732666015625, 1053.49560546875, 106.60148620605469, 177.47598266601562, 604.6498413085938, 426.9208679199219, 647.473388671875, -647.886474609375, 436.2467041015625, -157.37924194335938, 348.916259765625, 217.9979705810547, 499.5892639160156, 292.6532897949219, -406.559814453125, 574.1124267578125, -192.77191162109375, -381.98443603515625, 95.22752380371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 526.6087646484375, "std": 506.7965393066406, "min": -452.6608581542969, "p10": -60.190314865112285, "median": 479.5000305175781, "p90": 1305.2790405273438, "max": 1946.197265625, "pos_frac": 0.859375, "sample": [41.11551284790039, -29.8629150390625, 533.1512451171875, 642.5555419921875, 687.634033203125, 641.4992065429688, -452.6608581542969, 33.22185134887695, 1363.974609375, -250.48130798339844, 81.05889129638672, 405.86993408203125, -68.34785461425781, 561.1002807617188, 559.1043090820312, 1414.37744140625, 488.8813781738281, 349.65087890625, 662.5988159179688, 1577.36376953125, 971.843017578125, 403.05902099609375, 570.1363525390625, 631.992431640625, 384.37347412109375, 306.609375, 937.9359741210938, 385.585205078125, 652.1806030273438, 440.5333251953125, 416.0418701171875, 6.459743499755859, 1278.400390625, 165.442138671875, 517.379150390625, 651.4295654296875, 702.7940063476562, 639.0634765625, 540.2077026367188, 40.44621276855469, -239.59078979492188, 993.2440185546875, 1227.666259765625, 418.6860046386719, -41.15605545043945, 1504.363525390625, -287.0097961425781, -241.22952270507812, 1520.2890625, 849.7428588867188, -205.81259155273438, 69.73916625976562, 1316.7984619140625, 965.8382568359375, 447.9635925292969, 319.786865234375, 794.91455078125, 191.32574462890625, 470.1186828613281, 880.3320922851562, 191.5578155517578, 1946.197265625, 401.59844970703125, 323.8758544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 598.0020751953125, "std": 716.9085083007812, "min": -806.7570190429688, "p10": -109.85212402343748, "median": 534.4215087890625, "p90": 1435.9460693359376, "max": 2848.980224609375, "pos_frac": 0.84375, "sample": [734.6840209960938, 65.53961944580078, 1026.2418212890625, 2848.980224609375, 820.6036376953125, 1136.528076171875, 552.807373046875, 897.5755615234375, -98.44171142578125, 609.1505737304688, 674.0552978515625, 496.7884216308594, 160.88519287109375, 599.564208984375, 297.41949462890625, 800.0006713867188, 109.73348236083984, 667.9710693359375, 1364.7154541015625, -376.9262390136719, 71.93246459960938, 710.7252807617188, 570.931640625, 307.8663024902344, -806.7570190429688, 16.434349060058594, 39.53327178955078, 1553.779296875, 186.5618133544922, -233.61251831054688, 1407.501220703125, -629.8303833007812, 841.2791137695312, -99.95423126220703, 516.8792114257812, 169.35398864746094, 439.3505859375, 2508.34716796875, 1246.9443359375, 1902.9298095703125, 553.5032348632812, -114.09407806396484, 459.70648193359375, 1029.8284912109375, 656.1199951171875, 2534.817626953125, 273.9058837890625, 499.58917236328125, 1448.13671875, 2250.16357421875, 551.9638061523438, 158.76284790039062, 443.12109375, -367.9762878417969, -24.205291748046875, 505.0095520019531, -140.4196014404297, 143.6983184814453, 414.8502502441406, 587.5010986328125, 604.5819091796875, 947.141845703125, 722.2999267578125, 26.05535125732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 325.43560791015625, "std": 549.2989501953125, "min": -674.293212890625, "p10": -177.82926483154296, "median": 222.3748779296875, "p90": 980.0663452148439, "max": 2502.20361328125, "pos_frac": 0.78125, "sample": [2502.20361328125, 989.0159912109375, -214.58526611328125, 498.0013122558594, 482.58270263671875, -26.881629943847656, 55.74591064453125, 150.92462158203125, 179.02938842773438, 233.09521484375, 206.16937255859375, 315.492431640625, 142.05844116210938, 54.00216293334961, 9.822990417480469, 270.4844055175781, -604.7260131835938, 14.40771484375, 395.5224609375, -166.39369201660156, 139.03988647460938, 199.42433166503906, 339.9195861816406, -463.4117126464844, 504.7822265625, 59.45835876464844, 1234.8001708984375, 137.89739990234375, 6.480354309082031, 354.3699951171875, -152.20179748535156, 435.6907653808594, -182.730224609375, -145.68104553222656, 650.5177001953125, 356.16778564453125, 391.78759765625, 163.9012451171875, 959.183837890625, -64.7635726928711, 338.49407958984375, -511.27923583984375, 613.8056030273438, 622.4306030273438, 279.1178894042969, 232.515625, -674.293212890625, 129.88232421875, 527.1300048828125, 7.272453308105469, 1197.3392333984375, -31.125038146972656, 1152.503173828125, 500.38079833984375, 872.6871337890625, 1164.003173828125, 212.234130859375, 954.613525390625, -556.1306762695312, 1846.0992431640625, -164.42755126953125, 953.5498657226562, 111.55976104736328, 638.9110717773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 418.43157958984375, "std": 699.6099243164062, "min": -1124.553955078125, "p10": -483.08914184570307, "median": 432.4214630126953, "p90": 1344.7986572265627, "max": 2113.73388671875, "pos_frac": 0.734375, "sample": [189.1803741455078, -672.692626953125, 738.1199951171875, 635.345703125, 269.3504943847656, 911.273681640625, 598.989013671875, 480.7428283691406, -416.0303039550781, 428.4827880859375, 2025.1187744140625, 467.9969482421875, 356.1908874511719, 279.3471984863281, -166.75148010253906, 505.779052734375, 800.9525756835938, 638.1513671875, 572.3543090820312, -16.816184997558594, -664.8309936523438, -688.6519775390625, -511.8286437988281, 4.915306091308594, 2113.73388671875, -167.8634033203125, 240.39620971679688, 452.4755554199219, 479.92010498046875, 851.1768798828125, 583.52197265625, 1355.48828125, 1687.9342041015625, 192.24246215820312, 946.6928100585938, 436.3601379394531, 1014.5277099609375, 15.960689544677734, 1569.05126953125, -152.79722595214844, -106.02969360351562, -36.41657257080078, 1181.79833984375, 570.8892822265625, 1043.6007080078125, 35.81840896606445, 300.6739501953125, 3.367095947265625, 908.71728515625, 776.2260131835938, -881.8168334960938, 670.6871337890625, 74.5442123413086, -693.7071533203125, 1100.227294921875, -1124.553955078125, -405.6911926269531, -13.227838516235352, 340.93707275390625, 1319.856201171875, 1676.300048828125, 1758.33984375, 85.84166717529297, -190.27020263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 498.1239013671875, "std": 737.8878784179688, "min": -1096.882080078125, "p10": -236.2719589233398, "median": 382.9940643310547, "p90": 1604.3675781250001, "max": 2276.538818359375, "pos_frac": 0.796875, "sample": [694.7366943359375, 444.49005126953125, 1561.224609375, 262.2275695800781, 684.318115234375, 1272.0352783203125, 364.166015625, 139.48324584960938, -150.24737548828125, 195.15774536132812, -182.11326599121094, 805.6053466796875, 801.3109130859375, 494.12957763671875, 19.1236572265625, 89.00067138671875, 225.52981567382812, -326.3379211425781, 2128.01416015625, 1152.6568603515625, 152.44915771484375, 2136.7646484375, 37.33460235595703, 542.6279296875, 664.1837768554688, 162.37564086914062, 827.4517211914062, 40.44413757324219, 401.8221130371094, 78.50297546386719, 643.719970703125, 967.1243896484375, 582.5764770507812, 519.3248291015625, 23.003082275390625, 180.4314422607422, 740.9244384765625, -565.8583984375, 157.44195556640625, -44.58924102783203, -1016.4344482421875, 1232.1837158203125, 1670.4276123046875, 2276.538818359375, -511.2083435058594, 463.9681091308594, 2000.0721435546875, 137.89048767089844, 471.3872375488281, 107.124267578125, 1559.5496826171875, -391.30511474609375, 1622.857421875, -1096.882080078125, -29.58032989501953, -174.73666381835938, 2086.24658203125, 547.2515869140625, -199.57266235351562, 300.11578369140625, 1191.8016357421875, 235.46328735351562, -252.00022888183594, 726.2057495117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 303.96832275390625, "std": 460.9410095214844, "min": -477.2930908203125, "p10": -254.5648727416992, "median": 235.0877685546875, "p90": 954.9650573730469, "max": 1540.3203125, "pos_frac": 0.703125, "sample": [1273.5919189453125, 214.06829833984375, 69.31948852539062, -175.11767578125, 265.2720031738281, 151.14651489257812, 647.634765625, 442.3655700683594, -231.28407287597656, 355.6325988769531, -23.062843322753906, 745.849609375, 282.8063659667969, -350.6914367675781, 238.5992431640625, 486.58453369140625, 1171.6634521484375, -40.27423095703125, 685.9205322265625, -267.71209716796875, -318.493408203125, -85.52498626708984, -437.3056640625, 115.93482208251953, -477.2930908203125, -143.62139892578125, 443.5625915527344, -74.08992004394531, 955.2295532226562, -264.5423583984375, 325.94512939453125, 313.99920654296875, 348.0122375488281, 189.710205078125, 833.5885009765625, -73.06246185302734, 494.5797119140625, 145.11363220214844, 171.25466918945312, -18.694936752319336, -75.2255859375, 752.9932861328125, 229.4175567626953, 144.151611328125, -176.67990112304688, 640.8571166992188, 899.591552734375, 580.3901977539062, 1066.93212890625, 82.09830474853516, 231.5762939453125, -189.98556518554688, 1109.5885009765625, 283.40673828125, 1540.3203125, 81.12616729736328, 954.347900390625, 1380.9403076171875, 498.4834289550781, 414.3790283203125, 390.0801086425781, -349.46746826171875, 346.56072998046875, 231.4749755859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 449.748046875, "std": 737.3679809570312, "min": -1534.52587890625, "p10": -344.948843383789, "median": 364.8971862792969, "p90": 1401.0640747070318, "max": 2646.0869140625, "pos_frac": 0.765625, "sample": [-1534.52587890625, 185.99888610839844, -1073.7515869140625, 733.2513427734375, 364.26739501953125, 177.59100341796875, 140.1832733154297, 563.8494262695312, 691.9664306640625, 71.16671752929688, 933.1234741210938, 1555.12451171875, 1200.99658203125, 63.8397216796875, 183.49441528320312, 318.78759765625, 486.16937255859375, 954.4744873046875, -37.456172943115234, 98.18933868408203, 92.94136047363281, 1271.0687255859375, 1708.68359375, 643.5401611328125, 26.8997802734375, 605.887939453125, -33.9896240234375, -407.2921142578125, -241.5506591796875, 941.0054931640625, 124.84844970703125, 681.8597412109375, 1131.9735107421875, 550.1993408203125, 239.04685974121094, 197.6887664794922, 540.6685180664062, -763.0922241210938, 969.3119506835938, 585.7206420898438, 892.6936645507812, -369.7533264160156, 494.8423156738281, 140.85508728027344, 133.0967559814453, -472.192626953125, 795.7425537109375, 1700.5635986328125, -498.2604064941406, 2646.0869140625, -135.28782653808594, 2376.472412109375, 1632.13330078125, 155.85025024414062, 1081.3480224609375, -16.144729614257812, -234.3419189453125, 1456.7763671875, -287.07171630859375, 1189.0400390625, -36.484764099121094, 450.6122131347656, 379.61077880859375, 365.5269775390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 610.62109375, "std": 647.0774536132812, "min": -1700.3482666015625, "p10": -91.97024230957031, "median": 541.756103515625, "p90": 1397.3150756835937, "max": 1995.871337890625, "pos_frac": 0.84375, "sample": [424.800537109375, 1242.023193359375, 167.3791961669922, 1535.34375, 193.93984985351562, 409.1175537109375, 561.981201171875, 2.5266876220703125, 949.2897338867188, -399.0581359863281, -154.85824584960938, 1225.44677734375, 1398.131103515625, 1017.0711669921875, 447.96636962890625, 1167.8148193359375, 645.7186889648438, 372.3644104003906, 1657.0426025390625, 1807.193115234375, -255.833740234375, 388.0636901855469, 470.1927490234375, 1303.333984375, 15.772758483886719, 634.38525390625, 1287.973876953125, 585.3025512695312, 1432.60302734375, 545.625, 459.9272766113281, 623.34130859375, 1072.0328369140625, 1277.005615234375, 1497.9193115234375, 991.8118896484375, -1700.3482666015625, -94.73126220703125, 497.1135559082031, 246.2188262939453, 416.4212646484375, 7.855623245239258, 127.52079772949219, 410.5028076171875, 509.169677734375, -85.52786254882812, -248.2117462158203, 1995.871337890625, 1083.181640625, -1.3072185516357422, 807.375244140625, 1092.5076904296875, 574.5906982421875, 537.88720703125, 1166.8558349609375, -726.3670654296875, 835.5614013671875, 419.1230163574219, 433.0860595703125, -14.639373779296875, 219.48471069335938, 1395.4110107421875, 1097.336669921875, 1077.1141357421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 515.7313842773438, "std": 681.5225830078125, "min": -925.488037109375, "p10": -123.32455520629881, "median": 320.1153564453125, "p90": 1377.1375732421875, "max": 2538.9287109375, "pos_frac": 0.8125, "sample": [1036.76220703125, 1400.81884765625, 688.0443115234375, 786.880859375, 47.07862091064453, 52.499969482421875, 120.14662170410156, 162.2401885986328, 1364.444091796875, 2538.9287109375, 1181.26171875, 303.6600341796875, 1176.478515625, -10.026887893676758, 101.65853881835938, 791.08203125, 1091.999755859375, 1382.57763671875, -925.488037109375, -418.96368408203125, 1097.2706298828125, 329.57781982421875, 203.21078491210938, 310.65289306640625, 125.63423156738281, 1211.1728515625, -183.21273803710938, -133.6129150390625, 1416.3870849609375, 171.40386962890625, 14.606880187988281, 411.1558837890625, 124.19351959228516, -23.256858825683594, 253.1006317138672, -80.75175476074219, 52.31198501586914, -94.25609588623047, 165.10523986816406, 253.08395385742188, 2043.050048828125, 590.9638671875, 1172.5902099609375, 89.4584732055664, -868.1075439453125, 731.0709838867188, 735.1536865234375, 604.9739990234375, 1159.973388671875, -99.3183822631836, 588.0255126953125, 277.181640625, 410.05731201171875, 1210.247314453125, 1545.1859130859375, 2064.700439453125, -457.3892822265625, 861.4506225585938, 1035.5074462890625, 505.3079833984375, 20.554147720336914, 48.92674255371094, -499.65863037109375, 741.03857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 358.48895263671875, "std": 608.2311401367188, "min": -1058.275146484375, "p10": -304.5107421875, "median": 290.5583038330078, "p90": 1317.2050415039064, "max": 2235.702392578125, "pos_frac": 0.734375, "sample": [2235.702392578125, -488.80279541015625, -111.62174224853516, 1205.4913330078125, -467.6255187988281, 622.365234375, 54.933494567871094, -31.37299346923828, 125.881591796875, 393.7558288574219, 268.740478515625, 137.52114868164062, 229.32699584960938, 524.7091064453125, 1551.8148193359375, -236.510986328125, 699.1696166992188, 1362.3485107421875, 297.87347412109375, 1110.8251953125, 789.6373291015625, -57.83367919921875, 195.20452880859375, 1345.272216796875, 744.4945678710938, -574.8168334960938, 15.243412017822266, 1320.995361328125, 483.85845947265625, -168.10052490234375, 888.7976684570312, -313.73944091796875, 27.562793731689453, 552.968505859375, 443.625, -518.90869140625, 100.11286163330078, 1364.738037109375, 283.2431335449219, 577.286865234375, 563.2407836914062, -158.9644775390625, 412.2226257324219, 135.66952514648438, 556.2816772460938, 1308.3609619140625, -90.37846374511719, -28.807395935058594, -196.8562774658203, 1353.4249267578125, 496.59454345703125, 215.98001098632812, 459.8211975097656, 179.1652069091797, 371.9483642578125, 328.2055969238281, -282.97711181640625, 510.4754638671875, 819.4501342773438, 734.010009765625, -808.9956665039062, 110.46825408935547, -1058.275146484375, 29.06243896484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 316.22100830078125, "std": 675.1503295898438, "min": -2286.468017578125, "p10": -290.9691680908203, "median": 221.43043518066406, "p90": 1055.403552246094, "max": 1974.6383056640625, "pos_frac": 0.734375, "sample": [686.1123657226562, 462.57501220703125, 109.40316009521484, 50.229576110839844, -1103.732666015625, 1324.030029296875, 778.1517944335938, 49.14512634277344, 702.4964599609375, 1083.85400390625, 155.1591796875, 504.923583984375, 1759.4935302734375, -342.08935546875, 175.39321899414062, 154.65786743164062, 51.717464447021484, -144.44937133789062, 401.50537109375, 463.44110107421875, 1297.17822265625, 169.10433959960938, 661.6420288085938, 348.7232666015625, -287.81884765625, 93.50559997558594, 927.4652099609375, 457.7240905761719, -100.31482696533203, 1974.6383056640625, 816.735595703125, 1227.3284912109375, 0.7023239135742188, 261.4459228515625, 989.0191650390625, -641.5243530273438, -292.3193054199219, 661.3671875, 21.39529800415039, 741.7471313476562, 130.0336456298828, -421.3033752441406, -97.49585723876953, 832.9629516601562, 649.51171875, -210.80218505859375, 171.62356567382812, 850.0387573242188, 979.312744140625, 304.9897155761719, -2286.468017578125, 1935.779296875, 48.39961624145508, -119.05461883544922, 397.463134765625, -67.16961669921875, 451.86114501953125, -136.8807830810547, 684.2030029296875, 377.64447021484375, -563.81787109375, 181.41494750976562, -244.91030883789062, -258.9538269042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 423.7640380859375, "std": 654.8025512695312, "min": -1241.270263671875, "p10": -244.6290100097656, "median": 352.4298400878906, "p90": 1058.1807983398437, "max": 2751.784423828125, "pos_frac": 0.75, "sample": [457.9033508300781, 2751.784423828125, 1388.0377197265625, 481.1061096191406, 153.8421630859375, 520.9387817382812, 839.9210205078125, 351.22607421875, 1054.2772216796875, 707.1209106445312, 804.3517456054688, 1013.7884521484375, 292.6868896484375, 498.6722717285156, 1072.1143798828125, -219.16636657714844, 259.2628173828125, 932.89013671875, 224.44093322753906, 458.9779968261719, 312.0179443359375, 599.9642333984375, 472.8468017578125, -259.7942810058594, 25.128807067871094, 760.1788330078125, 1045.509521484375, -65.01739501953125, 1128.41064453125, 304.9296875, -66.71783447265625, -212.92559814453125, -42.85096740722656, 819.591796875, -250.9981689453125, 2570.163818359375, -229.76763916015625, 450.16424560546875, 610.01416015625, 1041.2940673828125, 714.4706420898438, 134.79298400878906, 697.4117431640625, 321.516357421875, -1241.270263671875, 277.23150634765625, -291.9884033203125, 981.7069091796875, 353.63360595703125, 1059.853759765625, -114.30267333984375, -155.16807556152344, -56.416648864746094, 66.20347595214844, 685.9014892578125, 189.8369903564453, -605.3828735351562, 16.564550399780273, 32.98900604248047, -633.4632568359375, 99.17781066894531, -584.85400390625, 1142.565185546875, 973.56982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 322.13623046875, "std": 784.4547729492188, "min": -2833.4873046875, "p10": -603.7732971191406, "median": 398.44921875, "p90": 1093.4031005859376, "max": 1898.58935546875, "pos_frac": 0.734375, "sample": [202.10784912109375, -2833.4873046875, -888.6889038085938, 458.6253662109375, 836.6963500976562, 151.498779296875, 1420.8597412109375, -74.18148803710938, 331.44390869140625, 741.471435546875, 1095.83056640625, 263.445556640625, -589.0033569335938, 928.6888427734375, 773.1421508789062, 773.3784790039062, 118.50987243652344, 1898.58935546875, 803.7777099609375, -250.42759704589844, 672.2513427734375, 137.56771850585938, -196.98699951171875, 323.1912841796875, 868.6106567382812, -16.655498504638672, 692.6312866210938, 314.57061767578125, 1298.69091796875, 973.5111083984375, 276.04815673828125, 741.2460327148438, -1866.6475830078125, 554.24853515625, -120.80952453613281, 48.30280303955078, -98.96554565429688, 644.4536743164062, 642.5587768554688, 384.06060791015625, 904.3025512695312, 586.9412841796875, 1296.0103759765625, 366.67449951171875, 1087.739013671875, -183.9385986328125, -1380.0791015625, 1842.8759765625, -610.103271484375, 1421.5518798828125, -46.54256820678711, 703.796142578125, 71.30976104736328, -700.7266845703125, 774.7041015625, -634.9465942382812, 515.1615600585938, -373.61474609375, 102.52491760253906, 628.390380859375, 412.83782958984375, 474.6968078613281, 343.7765808105469, 579.21923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 411.1693115234375, "std": 584.3018798828125, "min": -569.5101318359375, "p10": -270.19848327636714, "median": 349.68153381347656, "p90": 1180.8415527343757, "max": 2241.0283203125, "pos_frac": 0.765625, "sample": [373.3563232421875, 600.0850830078125, 765.4622802734375, 172.9407501220703, 631.887451171875, 635.6090087890625, 837.4208984375, -340.3450012207031, 392.8785095214844, -569.5101318359375, 474.8228454589844, 738.3873901367188, -413.83709716796875, 481.5587463378906, 1003.0589599609375, 34.029541015625, -288.5187072753906, 1246.8572998046875, 1374.1173095703125, 353.4912109375, 345.8718566894531, 189.02247619628906, 109.03361511230469, -237.7896270751953, -380.96478271484375, 366.4561462402344, -246.66519165039062, 175.44802856445312, 28.802276611328125, -527.8587646484375, 372.7042236328125, 2110.70263671875, 771.0390625, -159.52532958984375, -22.708337783813477, 411.74700927734375, 172.86769104003906, 111.7693099975586, 487.374755859375, -31.159282684326172, 555.616943359375, 2241.0283203125, -161.6875, 615.32666015625, 797.0861206054688, 770.5830078125, 1540.8798828125, 876.4021606445312, 1421.126708984375, 209.36309814453125, -10.943000793457031, 705.2220458984375, 4.000099182128906, 81.50247192382812, 155.5491485595703, 338.7303466796875, 218.8981475830078, 1026.8048095703125, 297.52496337890625, -220.85658264160156, 1417.87744140625, -280.2841796875, 945.395263671875, 219.76931762695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 431.43963623046875, "std": 758.5753784179688, "min": -1354.298828125, "p10": -483.37856750488277, "median": 308.1321563720703, "p90": 1346.343603515625, "max": 2525.639404296875, "pos_frac": 0.6875, "sample": [-46.59364700317383, 1223.8677978515625, 1362.510498046875, -1354.298828125, 246.305908203125, 868.3598022460938, 2093.50146484375, -76.36805725097656, -137.53028869628906, -453.8343505859375, 499.9442443847656, 1110.5130615234375, -305.0631408691406, -1129.427978515625, 279.90728759765625, -595.898681640625, -496.0403747558594, -190.33795166015625, -108.6580810546875, 514.507080078125, 1160.599853515625, 45.486061096191406, 277.9754943847656, 1504.977294921875, -505.5240478515625, 14.638191223144531, 231.8858642578125, 343.6240539550781, -516.1787109375, 1110.655517578125, 654.8841552734375, 242.7499237060547, 668.6835327148438, 901.8977661132812, 821.5345458984375, -2.2395477294921875, 880.5706176757812, 672.4672241210938, -292.62103271484375, -71.80370330810547, -79.20913696289062, 860.9644775390625, 824.691650390625, 1849.6024169921875, 837.8814086914062, 2525.639404296875, 562.0321044921875, 2261.8408203125, 1358.267578125, -527.713134765625, -134.1512451171875, 16.921890258789062, 873.758544921875, 439.90277099609375, 959.3211669921875, 347.5146179199219, 1160.4404296875, 303.389404296875, 144.1470947265625, 11.887123107910156, -276.15673828125, 312.8749084472656, 210.1407928466797, 1318.52099609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 485.27099609375, "std": 777.9865112304688, "min": -1444.614013671875, "p10": -424.1321319580078, "median": 562.3296203613281, "p90": 1524.4004272460938, "max": 2281.95703125, "pos_frac": 0.703125, "sample": [-278.3050537109375, -810.7505493164062, 700.0907592773438, 2085.265380859375, 459.0166320800781, -169.05215454101562, 746.3238525390625, 230.64297485351562, -1060.9947509765625, 550.1065063476562, 590.5040283203125, -245.5798797607422, -27.445465087890625, 679.7173461914062, 821.2581176757812, 452.72711181640625, 854.911865234375, 19.537403106689453, 13.368885040283203, -1444.614013671875, 1082.8292236328125, 738.972412109375, 533.1516723632812, 47.76263427734375, 1236.033935546875, -493.7613220214844, 574.552734375, 1167.1590576171875, -54.06034851074219, 1075.162353515625, 858.631591796875, 1142.6971435546875, 776.0382080078125, 106.03707885742188, 1065.4482421875, -346.2035827636719, -152.18295288085938, 2034.4754638671875, 891.1997680664062, -268.54425048828125, -427.885986328125, 769.0291748046875, 597.512939453125, 1561.782470703125, 1513.6322021484375, 1776.7882080078125, 2281.95703125, 789.1983642578125, -762.4131469726562, 335.4765625, -670.7857055664062, 1761.46728515625, 849.1569213867188, 1529.015380859375, -115.24340057373047, -391.2511901855469, 1069.8175048828125, 447.51025390625, 1113.5738525390625, -415.3731384277344, 63.57343292236328, -4.49609375, 683.4464721679688, 519.7266845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 376.59283447265625, "std": 500.58233642578125, "min": -1142.6712646484375, "p10": -82.12147674560545, "median": 316.5018005371094, "p90": 986.4532592773438, "max": 1778.4803466796875, "pos_frac": 0.796875, "sample": [67.74919891357422, -22.63663673400879, 1283.5694580078125, 661.4278564453125, 382.22216796875, 721.2576904296875, 161.85028076171875, -89.4505386352539, 1027.74365234375, 167.36087036132812, 994.310791015625, 52.21857452392578, 1285.658447265625, 318.92242431640625, 56.709564208984375, 291.14044189453125, 1208.41162109375, 632.362548828125, 647.0692138671875, 61.1595344543457, 585.9402465820312, 202.63546752929688, -218.60572814941406, 673.3292846679688, 339.9484558105469, 775.1736450195312, 712.1015014648438, 1472.08154296875, -45.18321228027344, 670.3663330078125, -65.02033233642578, 428.16180419921875, 420.69866943359375, -1142.6712646484375, 311.6058349609375, -29.915180206298828, -7.434749603271484, 776.345458984375, 1778.4803466796875, 968.1190185546875, 213.42916870117188, -929.2339477539062, 311.5025634765625, 114.95471954345703, 239.11524963378906, 939.5486450195312, 268.0791015625, 215.2489013671875, 393.87200927734375, 462.21051025390625, 254.56932067871094, 853.4144287109375, -0.02362823486328125, 462.4605712890625, -192.0739288330078, 314.0811767578125, 167.5220947265625, 321.0963134765625, -95.27237701416016, 208.3074951171875, 432.76202392578125, 639.820556640625, -530.9268188476562, 522.26318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 503.47100830078125, "std": 562.0370483398438, "min": -812.0194702148438, "p10": -114.65305099487304, "median": 405.3091278076172, "p90": 1194.8390136718751, "max": 2042.878173828125, "pos_frac": 0.875, "sample": [704.43994140625, -746.1256103515625, 1396.36962890625, 966.153564453125, 592.9654541015625, 419.5990905761719, 680.1380004882812, 872.987060546875, 366.6629943847656, -812.0194702148438, -335.9649658203125, -108.71993255615234, 611.90673828125, 1916.593505859375, 213.91534423828125, 549.09912109375, 229.4466552734375, 257.83355712890625, 305.9322204589844, -174.62831115722656, 673.4180297851562, 381.8742980957031, 994.974609375, 1020.8967895507812, 538.1494140625, 314.6996765136719, 78.85836029052734, 2042.878173828125, 300.1757507324219, 374.4685363769531, 834.51416015625, 677.6060791015625, 1207.0252685546875, 674.2197265625, 1565.1910400390625, 391.0191650390625, 175.01158142089844, 614.005859375, 287.6142883300781, 897.9677734375, 1010.8964233398438, 49.87564468383789, 804.8297729492188, 793.1104125976562, 7.9056243896484375, 324.48529052734375, 87.33065795898438, -117.19581604003906, 373.41314697265625, 1246.16064453125, 113.83562469482422, 1371.6026611328125, 495.124267578125, -704.3727416992188, 244.64035034179688, -385.771240234375, 1135.48583984375, 1166.4044189453125, 255.04989624023438, 628.9351196289062, 217.7265625, 778.2640380859375, 181.2833251953125, 191.9730682373047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 409.83868408203125, "std": 741.5772705078125, "min": -2036.286865234375, "p10": -386.49336853027336, "median": 361.9074249267578, "p90": 1377.627038574219, "max": 2488.928466796875, "pos_frac": 0.75, "sample": [-731.41259765625, 215.15469360351562, 1818.51123046875, 187.47840881347656, 525.6144409179688, -166.99668884277344, 744.1365966796875, 81.73171997070312, -525.4890747070312, 120.64399719238281, 847.9976196289062, 1402.3800048828125, 521.12158203125, 608.7330932617188, 381.16015625, -546.0135498046875, 297.77447509765625, 846.4996948242188, -462.89471435546875, -19.164392471313477, -1321.204833984375, 1748.746826171875, 68.16921997070312, -272.81121826171875, -31.769193649291992, 706.0807495117188, 1191.634765625, -9.906448364257812, 77.61062622070312, 8.703609466552734, 1518.2889404296875, 853.4263916015625, 704.2440185546875, -15.9033203125, 1502.8349609375, 397.8183288574219, 134.81195068359375, 356.8111877441406, 775.718994140625, -2036.286865234375, 581.3333740234375, 134.36874389648438, -72.5914306640625, 1269.8861083984375, 830.01318359375, 149.84637451171875, 947.9977416992188, 180.43594360351562, -324.8462219238281, 662.3145751953125, -412.91357421875, 1800.457763671875, 47.16686248779297, 590.482421875, 1049.8677978515625, -118.35401153564453, 141.25942993164062, 642.8533935546875, 367.003662109375, 145.44281005859375, 581.2652587890625, 723.6004638671875, 1319.8701171875, 2488.928466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 375.02764892578125, "std": 607.5836791992188, "min": -1043.7607421875, "p10": -264.85154724121094, "median": 284.62831115722656, "p90": 1185.3422241210942, "max": 2537.119140625, "pos_frac": 0.765625, "sample": [255.5646209716797, 1081.3953857421875, 373.32049560546875, 1471.60107421875, 2537.119140625, 139.2505645751953, 829.0833129882812, 1463.475830078125, 1579.9222412109375, 253.18589782714844, 362.7874755859375, 944.3597412109375, 312.3122863769531, 416.4354248046875, 82.17708587646484, 505.42236328125, 836.8765869140625, 417.78839111328125, 733.6318359375, -216.7926788330078, 728.03515625, -265.0780944824219, -504.3812561035156, -931.8897094726562, 121.4161148071289, 885.028564453125, -515.85791015625, -164.21945190429688, 378.41729736328125, 401.2381591796875, 484.9041442871094, 254.1741485595703, -264.32293701171875, -414.15374755859375, 238.97103881835938, 291.72906494140625, 277.5275573730469, 466.79107666015625, 112.70989227294922, 1392.1556396484375, 499.70794677734375, -156.0914306640625, 247.15255737304688, 143.26434326171875, 1229.890869140625, 321.55322265625, 233.3698272705078, 809.8198852539062, 1014.1770629882812, 811.120849609375, 38.33915328979492, 193.49252319335938, 575.8544921875, 406.68670654296875, -54.18128204345703, -1043.7607421875, 228.96728515625, -27.683029174804688, 184.3312530517578, -128.54580688476562, -214.8436279296875, 1368.1475830078125, 266.2998352050781, -297.4125671386719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 453.40087890625, "std": 690.3434448242188, "min": -1252.7838134765625, "p10": -440.6071472167969, "median": 473.40126037597656, "p90": 1361.3691162109376, "max": 2297.852783203125, "pos_frac": 0.765625, "sample": [534.1871337890625, 738.2691650390625, 131.2362823486328, 970.94189453125, -193.11691284179688, 535.1744384765625, 102.77195739746094, 994.2260131835938, 345.16680908203125, 1072.975830078125, 1835.566162109375, -446.9967956542969, -275.8283386230469, 1856.515625, 197.8677215576172, 613.7323608398438, 833.873046875, 830.2736206054688, 524.8128662109375, 922.3380126953125, 2297.852783203125, 1785.9864501953125, 682.6493530273438, 1114.612060546875, -470.6343688964844, -5.052736282348633, 720.352294921875, 381.28533935546875, 1517.8173828125, 1182.427490234375, 274.584716796875, -76.20635986328125, 480.1966857910156, 1636.1328125, 933.60546875, -12.517566680908203, -563.5768432617188, 160.20462036132812, 1336.5714111328125, 501.22174072265625, 855.8001098632812, -425.6979675292969, 1371.9967041015625, 560.9424438476562, 476.51702880859375, 103.55701446533203, 653.525634765625, 217.51100158691406, 569.6000366210938, -5.6414947509765625, -584.521484375, -1252.7838134765625, 330.55426025390625, 33.96026611328125, -713.1785278320312, 345.6866455078125, 299.7108154296875, 470.2854919433594, -869.8695678710938, 30.248821258544922, -123.57742309570312, 601.9811401367188, 0.695709228515625, 68.8553466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 472.1002197265625, "std": 793.0408935546875, "min": -1443.1114501953125, "p10": -331.29786987304686, "median": 358.54388427734375, "p90": 1511.7729248046883, "max": 3268.445556640625, "pos_frac": 0.734375, "sample": [-314.2036437988281, -166.6420440673828, -318.36090087890625, 541.074462890625, 1987.759765625, 145.20220947265625, 122.49539184570312, 62.80768585205078, 94.20297241210938, 239.01333618164062, -61.621315002441406, 1334.4970703125, 1001.2325439453125, 294.483642578125, 613.6908569335938, 1178.407958984375, 1033.6641845703125, 1253.82568359375, 1587.748291015625, 672.8872680664062, 235.37229919433594, 750.87158203125, -142.1562042236328, 1207.4752197265625, -1124.2789306640625, 126.26461029052734, 313.72576904296875, -336.84228515625, -303.8050842285156, 999.9280395507812, -550.68603515625, 553.0909423828125, 475.6005554199219, -8.294036865234375, 269.6719055175781, 3268.445556640625, 99.10164642333984, 422.7109680175781, -244.58848571777344, -619.2483520507812, 1678.2762451171875, 177.54730224609375, -342.044677734375, 521.8890991210938, -265.8647155761719, -408.4914245605469, 1172.9620361328125, 802.3582763671875, 663.8242797851562, -15.42431640625, 833.3898315429688, 438.38580322265625, 388.5860290527344, 2034.999755859375, 125.21525573730469, 328.5017395019531, 663.3448486328125, 2092.076904296875, 39.6932373046875, -1443.1114501953125, 659.5275268554688, 1085.975341796875, 1595.7628173828125, 692.5099487304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 407.7635498046875, "std": 574.3579711914062, "min": -618.6739501953125, "p10": -202.10932388305656, "median": 282.1593322753906, "p90": 993.4935058593752, "max": 2525.49462890625, "pos_frac": 0.796875, "sample": [685.9671020507812, -112.09661865234375, 37.139427185058594, -14.21270751953125, 40.05735778808594, 2525.49462890625, 942.5692749023438, -125.22655487060547, 215.115478515625, 128.3840789794922, 424.6914367675781, 266.8192138671875, -618.6739501953125, 2199.953369140625, 408.06707763671875, 489.7357177734375, 1814.1708984375, -565.8009643554688, -0.49211883544921875, -235.05908203125, -498.16259765625, 1042.6529541015625, 901.9171142578125, 518.0745849609375, 41.63636016845703, 266.02435302734375, 871.3367919921875, 1015.3181762695312, 187.57174682617188, 1101.062255859375, -279.0657958984375, 913.0631103515625, 345.8930358886719, 561.5280151367188, 694.172607421875, -363.8824157714844, 297.49945068359375, 68.13848876953125, 807.212158203125, 48.80146026611328, 908.893798828125, 339.7452087402344, 261.72454833984375, -117.92654418945312, 469.7206726074219, 72.80464172363281, 262.80389404296875, 111.20195007324219, 804.5545043945312, -245.5325927734375, 573.1109619140625, 210.44699096679688, 166.86993408203125, 736.2967529296875, 384.77557373046875, 86.28398895263672, -33.760643005371094, 824.116455078125, 532.8746337890625, 604.3001708984375, 194.638427734375, 740.6974487304688, 1035.240966796875, 125.58921813964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 435.6454772949219, "std": 595.898193359375, "min": -997.873779296875, "p10": -301.77820739746096, "median": 362.06317138671875, "p90": 1289.6033935546877, "max": 1946.029541015625, "pos_frac": 0.78125, "sample": [358.9665832519531, 715.08984375, -129.72689819335938, -91.14867401123047, 1236.2762451171875, -11.685781478881836, 140.02508544921875, 1312.4578857421875, 1030.7197265625, 870.5785522460938, 90.15190887451172, 1539.2154541015625, 1061.7532958984375, 800.8363647460938, 133.24920654296875, 867.0440673828125, 76.44524383544922, 452.31060791015625, -658.8352661132812, 1946.029541015625, 587.8143920898438, 606.7011108398438, 464.35009765625, 626.5914916992188, 256.4429931640625, 743.8898315429688, 365.1597595214844, -545.2235107421875, 126.76805114746094, -303.52154541015625, 173.18319702148438, -362.4951477050781, 241.45050048828125, 1410.0035400390625, -187.66754150390625, 723.4091186523438, -433.2876892089844, 874.447998046875, 248.1095733642578, 579.6351928710938, 1016.4202270507812, -405.91864013671875, 745.9298095703125, 803.513427734375, 631.0123291015625, 351.8294982910156, 472.2936706542969, 13.745712280273438, -997.873779296875, -297.7104187011719, 262.8352355957031, 82.98040008544922, -51.65007019042969, 1387.7845458984375, -73.74905395507812, 940.142333984375, 303.1146545410156, 1392.1007080078125, 168.89817810058594, 602.5408325195312, 133.13856506347656, 1788.684326171875, 640.1890258789062, 35.54505157470703], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 411.2996520996094, "std": 699.7051391601562, "min": -1343.544189453125, "p10": -244.4066192626953, "median": 409.5190734863281, "p90": 1289.7613037109381, "max": 2185.760498046875, "pos_frac": 0.765625, "sample": [1381.734619140625, 242.41151428222656, 883.2323608398438, 356.96002197265625, 614.987548828125, -704.7379150390625, 456.4974365234375, 531.57861328125, 195.18020629882812, 127.90309143066406, 84.60358428955078, -56.879188537597656, 517.419921875, 1158.2423095703125, 606.5184326171875, 290.2723693847656, -149.67544555664062, 794.5255126953125, 291.4519958496094, 301.226318359375, 389.7103576660156, -47.8154296875, 1400.0032958984375, 562.0992431640625, 610.6322631835938, 311.5694274902344, -1343.544189453125, 299.4918518066406, -780.520263671875, 582.7619018554688, -886.043212890625, 1139.044921875, 912.7026977539062, 1346.1265869140625, 257.45404052734375, -219.8672332763672, -235.64678955078125, -95.92797088623047, 1416.8154296875, 1057.828369140625, -239.8670654296875, 269.75, -246.35214233398438, 997.1534423828125, 1106.1678466796875, -937.2845458984375, 547.1376953125, 671.9442749023438, 827.95654296875, 2185.760498046875, 517.9285888671875, 991.2213134765625, 429.3277893066406, 70.419677734375, 1598.619873046875, 126.09862518310547, 622.7901000976562, -1323.0623779296875, 2137.142822265625, 66.85084533691406, -180.25341796875, 723.24072265625, 136.31764221191406, 623.8408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 304.1596374511719, "std": 563.8802490234375, "min": -1004.6672973632812, "p10": -328.61443786621095, "median": 212.38148498535156, "p90": 939.6970092773438, "max": 1619.4888916015625, "pos_frac": 0.71875, "sample": [182.61537170410156, 908.9959716796875, -128.5285186767578, 63.88077163696289, -350.554443359375, -241.57887268066406, -227.54039001464844, 1582.3905029296875, 916.624267578125, 163.77398681640625, 374.6526184082031, 1536.0438232421875, 130.10565185546875, 165.6782989501953, 767.7815551757812, 371.68121337890625, 416.3041687011719, -875.6107177734375, 281.6569519042969, 609.4634399414062, 573.8024291992188, 1026.2989501953125, 693.7156982421875, 949.5853271484375, -16.258899688720703, 1470.470947265625, -89.15751647949219, -326.98797607421875, -176.94839477539062, 755.19287109375, -219.2482147216797, 193.02674865722656, 55.973052978515625, 39.507530212402344, 842.3997192382812, 556.004150390625, 1619.4888916015625, 313.0599365234375, 198.4818115234375, 693.8077392578125, 64.55648040771484, 707.82177734375, 210.05471801757812, 407.3214111328125, -15.076087951660156, -844.5809936523438, -62.481422424316406, 764.7415771484375, -558.3592529296875, 637.7761840820312, 390.9205627441406, 1041.665771484375, 893.4668579101562, 916.4465942382812, -1004.6672973632812, -329.3114929199219, 165.02325439453125, 258.2452087402344, 277.7474060058594, 214.708251953125, -336.96441650390625, 32.47195816040039, -246.03265380859375, 80.6717300415039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 405.6855773925781, "std": 643.036376953125, "min": -1469.5966796875, "p10": -77.11927413940428, "median": 334.74342346191406, "p90": 1326.0718872070315, "max": 2487.5556640625, "pos_frac": 0.84375, "sample": [-1469.5966796875, 2487.5556640625, 1252.3875732421875, 1632.4957275390625, 125.92607879638672, 32.42185974121094, 322.6304626464844, 140.54798889160156, 162.120361328125, -63.494964599609375, 716.751708984375, 371.67840576171875, 587.9466552734375, 356.33697509765625, 475.3228759765625, 830.2965087890625, 958.7779541015625, 1423.6622314453125, -557.362060546875, 629.6419677734375, 183.6072235107422, 51.532997131347656, -932.69580078125, 240.897705078125, 236.7941436767578, 738.178466796875, 149.9371337890625, 242.5049285888672, 474.83648681640625, 365.6285400390625, 111.69918823242188, 467.84381103515625, 244.10018920898438, -80.27465057373047, 1809.2039794921875, 55.766143798828125, 422.00714111328125, -604.2978515625, 244.63771057128906, -292.7733154296875, 482.1180725097656, 168.30368041992188, 272.6263427734375, 306.67999267578125, -27.76715850830078, 577.26611328125, 66.47288513183594, 582.1854858398438, 792.2122802734375, 956.51220703125, 71.56047058105469, 384.0254821777344, 1357.65087890625, -69.75672912597656, 1754.5281982421875, 521.8793334960938, 441.9765930175781, -627.7813720703125, 2.7917633056640625, 1575.7967529296875, 75.94700622558594, 921.2221069335938, 485.3892517089844, 346.85638427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 433.41094970703125, "std": 548.8930053710938, "min": -878.673583984375, "p10": -151.95068206787107, "median": 366.4341735839844, "p90": 1214.7685119628914, "max": 1801.08154296875, "pos_frac": 0.796875, "sample": [1801.08154296875, 185.197998046875, 783.5386962890625, 92.98797607421875, 583.3009643554688, 343.18243408203125, 1328.099609375, 371.4425964355469, 168.66061401367188, -93.54142761230469, 99.27886962890625, 846.9641723632812, 459.908935546875, 238.52297973632812, 148.81985473632812, -666.2195434570312, 137.76988220214844, 23.05834197998047, 1714.1456298828125, 849.448974609375, 679.0226440429688, 469.823486328125, 1010.5248413085938, -125.21354675292969, -110.84735870361328, 884.2077026367188, 816.3424682617188, -82.00060272216797, -264.7632141113281, 136.5693359375, -20.394515991210938, -384.7078857421875, 463.18719482421875, 364.3754577636719, 146.7372589111328, 559.48388671875, -220.0172119140625, 234.87786865234375, -878.673583984375, -39.73253631591797, 502.1013488769531, 704.5730590820312, 548.454833984375, 368.4928894042969, 1302.301513671875, 946.7161865234375, 319.5577392578125, 98.6995849609375, 758.506103515625, 166.37632751464844, 612.115478515625, 754.8113403320312, 38.61259841918945, 1351.5377197265625, 859.6280517578125, 1781.7508544921875, -163.40945434570312, 328.8138427734375, 1433.8427734375, 897.748779296875, 562.761962890625, -185.18560791015625, 107.03523254394531, 588.0084838867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 272.8934326171875, "std": 601.426025390625, "min": -1673.0257568359375, "p10": -426.69777526855455, "median": 295.4705810546875, "p90": 893.6755554199221, "max": 2170.780517578125, "pos_frac": 0.6875, "sample": [21.396873474121094, -179.2212371826172, -1673.0257568359375, 534.73779296875, -72.11282348632812, 130.52090454101562, 812.0963745117188, 361.4010925292969, 772.6625366210938, 391.7984313964844, -6.354560852050781, -148.0384979248047, -731.815185546875, 413.271728515625, 255.51473999023438, 636.7781982421875, 285.7520751953125, 615.7810668945312, 1128.1251220703125, 176.55003356933594, 776.9476318359375, 91.59305572509766, 464.6672668457031, 595.7587890625, -47.83679962158203, 474.987060546875, 374.95501708984375, 123.9500503540039, 645.893798828125, 294.496826171875, 508.1087646484375, -471.69586181640625, 648.4912719726562, 646.7864990234375, 2170.780517578125, 1327.39208984375, 381.5588073730469, -633.2078857421875, -29.60208511352539, 488.2892150878906, 1160.9180908203125, -321.7022399902344, 771.0613403320312, -265.0320739746094, 1487.8350830078125, -109.98878479003906, -49.80580520629883, -180.34278869628906, -640.00830078125, 296.4443359375, 928.6380615234375, 527.5721435546875, -11.412185668945312, 636.8147583007812, 394.817626953125, -897.71142578125, 1094.755615234375, 3.128416061401367, 289.01123046875, -857.0285034179688, 47.10874938964844, -53.71631622314453, 293.41729736328125, 362.273193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 497.3021240234375, "std": 677.1400756835938, "min": -1093.759765625, "p10": -68.5765281677246, "median": 338.29344177246094, "p90": 1221.4623291015625, "max": 2924.743408203125, "pos_frac": 0.8125, "sample": [-250.7931365966797, 971.5614013671875, 205.2295379638672, 553.77978515625, -142.37030029296875, 133.79559326171875, 187.7294158935547, 2074.04052734375, 423.05914306640625, 319.25775146484375, 461.05694580078125, -31.650390625, 2924.743408203125, 251.23944091796875, 64.5067138671875, 632.731201171875, 337.5992431640625, 145.6780242919922, 906.0340576171875, 465.82122802734375, 256.7123718261719, 1044.029541015625, 1161.097900390625, 656.2020263671875, 575.5762329101562, 30.733802795410156, 1014.0606689453125, 1232.336669921875, 575.0650634765625, 155.54519653320312, 39.128013610839844, 197.2690887451172, 99.1980209350586, 104.07467651367188, 793.6854248046875, 1187.287109375, 75.75738525390625, -74.05619049072266, 800.5015869140625, 796.8014526367188, 1770.3741455078125, -6.419755935668945, -1093.759765625, 1196.0888671875, 0.22991180419921875, 766.7406616210938, -450.5851745605469, -81.72151184082031, 540.84130859375, 2667.8037109375, 66.83653259277344, -55.7906494140625, 1392.476806640625, 115.16909790039062, 368.3024597167969, 366.8584289550781, -10.285804748535156, 338.9876403808594, -23.551610946655273, -326.4463195800781, 371.6788330078125, 980.0350952148438, 1253.4130859375, 326.00653076171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 348.74224853515625, "std": 626.4593505859375, "min": -894.0809936523438, "p10": -501.3561157226562, "median": 352.19761657714844, "p90": 974.6203613281251, "max": 3010.380126953125, "pos_frac": 0.765625, "sample": [289.7093811035156, 149.03041076660156, 233.74496459960938, -134.03550720214844, 383.4912109375, 426.6285705566406, -423.08612060546875, 208.16583251953125, 958.0654296875, -508.0877685546875, 809.0935668945312, 824.6284790039062, 524.5948486328125, 326.91668701171875, 688.0227661132812, -540.164794921875, 1427.592041015625, 815.346435546875, 556.3471069335938, 1283.7362060546875, -253.90245056152344, 642.9791870117188, 69.28900146484375, 618.1482543945312, 557.5134887695312, 529.94287109375, 309.3245849609375, 163.33087158203125, 215.7108154296875, 981.71533203125, -339.9935302734375, 3010.380126953125, 6.752986907958984, 1407.59326171875, 269.3778991699219, 461.97283935546875, 888.5082397460938, -70.01713562011719, 75.3105239868164, 396.28875732421875, 377.4785461425781, 56.453948974609375, -55.65789794921875, -240.52037048339844, 1115.6134033203125, 229.34144592285156, 666.0549926757812, 806.0051879882812, 1274.0692138671875, 588.1685791015625, -606.6961669921875, -894.0809936523438, 23.589733123779297, -516.8038330078125, 630.4926147460938, 382.02020263671875, -568.8088989257812, -800.9381103515625, 151.11065673828125, -485.64892578125, 382.9901428222656, 803.4007568359375, 311.02154541015625, 450.8832702636719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 387.3603820800781, "std": 560.1697998046875, "min": -1112.8984375, "p10": -205.17096710205078, "median": 296.1603088378906, "p90": 1134.8722290039063, "max": 1670.76513671875, "pos_frac": 0.75, "sample": [-1112.8984375, 246.58175659179688, 1280.1937255859375, 317.20263671875, 618.2589111328125, -211.138671875, 238.90835571289062, 508.94818115234375, 483.487060546875, -29.41598892211914, 408.27349853515625, 66.16912078857422, -49.42735290527344, 757.9696655273438, 851.5357666015625, 1440.5389404296875, -11.05927848815918, -45.49742126464844, -215.0868377685547, 1155.281982421875, 435.77056884765625, 65.17872619628906, 608.0725708007812, -797.6690673828125, -231.22802734375, 864.2635498046875, 182.31829833984375, 1100.8438720703125, 1670.76513671875, 1519.91015625, 920.4793701171875, 415.2170715332031, 1167.81689453125, -50.897926330566406, -325.07086181640625, 225.92013549804688, -8.195457458496094, 146.01002502441406, 580.8052978515625, 664.5458984375, 353.1929016113281, -191.24632263183594, 871.4386596679688, 975.7755126953125, 8.354507446289062, 1028.365966796875, 14.261497497558594, 752.6723022460938, -73.05497741699219, 265.128173828125, 16.905078887939453, 803.720458984375, 945.6085205078125, 275.11798095703125, -45.70994567871094, -992.1194458007812, 264.6834411621094, 211.20791625976562, 776.4246826171875, 627.3775634765625, 1149.455810546875, 212.17356872558594, 625.1500854492188, 62.49842071533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 264.3559265136719, "std": 664.1754760742188, "min": -1296.3990478515625, "p10": -594.82197265625, "median": 269.37940979003906, "p90": 1008.2233886718751, "max": 2076.803466796875, "pos_frac": 0.71875, "sample": [238.73477172851562, 270.81591796875, 267.9429016113281, -623.3071899414062, 905.4371337890625, 73.74110412597656, 1019.272705078125, 414.1661376953125, 2076.803466796875, 136.08038330078125, -177.75775146484375, 68.41734313964844, -952.5382080078125, 279.489013671875, 113.71331787109375, -294.685302734375, 928.8283081054688, 922.037841796875, 282.1675720214844, 1159.7315673828125, 516.9058837890625, 67.55176544189453, -597.7005004882812, -436.24957275390625, -29.82172203063965, -866.0479736328125, 323.130859375, 272.0970458984375, -491.5869140625, -588.1054077148438, -69.45219421386719, -81.23312377929688, 613.2047729492188, 622.2697143554688, 343.3958435058594, 1936.2122802734375, 438.7962951660156, 12.946037292480469, 866.6991577148438, 219.88998413085938, 4.96624755859375, 378.2276916503906, 383.3283996582031, -103.23011779785156, -1070.355224609375, -1296.3990478515625, 1648.939453125, 482.466552734375, -445.3031311035156, 837.1652221679688, 982.441650390625, 1409.90087890625, 159.41925048828125, 549.4949951171875, 825.070556640625, -699.7764282226562, 84.06666564941406, 563.2937622070312, 163.056884765625, 336.3270568847656, 1241.1192626953125, 14.73797607421875, -2.9941940307617188, 290.8205261230469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 502.6874084472656, "std": 599.4150390625, "min": -665.3850708007812, "p10": -104.641355895996, "median": 368.3525848388672, "p90": 1282.2503173828127, "max": 2496.358154296875, "pos_frac": 0.859375, "sample": [716.04443359375, 243.66075134277344, -236.20921325683594, 283.5145568847656, -8.2264404296875, -415.4686279296875, 611.0707397460938, 454.0706787109375, 1168.877685546875, 4.378551483154297, 34.78863525390625, -144.72857666015625, 124.45733642578125, 145.86419677734375, 416.8710632324219, 1150.0372314453125, 34.155208587646484, 1331.6846923828125, 914.213623046875, -527.60791015625, 285.05975341796875, 601.7872314453125, -11.104507446289062, 283.7379150390625, 361.67791748046875, 137.98837280273438, 1295.11083984375, 140.49801635742188, 235.84091186523438, 244.80715942382812, 1063.60205078125, 566.330322265625, 354.24169921875, -665.3850708007812, 758.8642578125, 356.26153564453125, 681.1824951171875, 460.8315124511719, 292.4394836425781, 0.083892822265625, 519.8218383789062, -304.25030517578125, 375.0272521972656, 1438.8963623046875, 812.1082153320312, 388.7582092285156, 1104.1624755859375, 1235.85546875, -230.7176513671875, 2496.358154296875, 1877.7261962890625, 35.25364685058594, 1650.0460205078125, 1252.242431640625, 443.2760009765625, 1676.8470458984375, 223.29774475097656, 683.756591796875, 18.110816955566406, 336.2471618652344, 574.5824584960938, 428.16827392578125, 1230.003173828125, 161.11131286621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 446.328369140625, "std": 729.8181762695312, "min": -1839.5206298828125, "p10": -371.0857727050781, "median": 425.61610412597656, "p90": 1344.123522949219, "max": 2547.2294921875, "pos_frac": 0.75, "sample": [248.2510986328125, -170.0290985107422, 702.2272338867188, 316.8040771484375, 431.93182373046875, 2547.2294921875, 1076.6005859375, -544.2638549804688, 432.6149597167969, 228.44601440429688, 1090.27685546875, 25.510269165039062, 541.4173583984375, 640.8399658203125, -113.45340728759766, 1494.8404541015625, 1378.3001708984375, 736.0202026367188, 1499.107177734375, 554.9833984375, 484.5228576660156, 1281.605712890625, 176.58660888671875, 419.3003845214844, 1036.239990234375, -57.05885314941406, 813.2808837890625, 1127.25439453125, 673.4790649414062, 170.73361206054688, 59.78553771972656, 695.7068481445312, 1370.9168701171875, 2.0123291015625, 1014.7235107421875, 193.864501953125, 1021.1060791015625, -375.9388732910156, -52.77214050292969, 907.9137573242188, 418.94927978515625, -1839.5206298828125, 923.76318359375, -847.380126953125, 819.0521850585938, 570.8110961914062, 490.6146240234375, -451.2653503417969, 857.7100219726562, -292.79437255859375, -48.89599609375, -1008.2587890625, -80.47337341308594, -359.7618713378906, 164.98915100097656, 1690.323486328125, 417.55572509765625, -548.0103759765625, 877.0844116210938, -36.06683349609375, 238.15867614746094, 185.52455139160156, 142.83082580566406, 2199.158447265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 293.4059143066406, "std": 595.1472778320312, "min": -1130.0457763671875, "p10": -361.36009521484374, "median": 291.10487365722656, "p90": 1074.7723144531255, "max": 1672.81689453125, "pos_frac": 0.65625, "sample": [1148.3084716796875, -222.5020751953125, -325.9571533203125, 510.8629150390625, 664.8650512695312, 379.1641845703125, -320.77557373046875, 288.027587890625, 344.2684020996094, -174.48873901367188, -506.0046691894531, 292.7424011230469, 719.6392211914062, 1418.195068359375, -110.70149993896484, -182.2273712158203, 599.0635986328125, -842.4224853515625, 194.03616333007812, 113.27043151855469, -127.04158782958984, -28.809423446655273, 1126.567138671875, 754.9837646484375, 909.4635620117188, -80.74436950683594, 715.6227416992188, -281.97930908203125, -90.10626983642578, 448.6431579589844, 781.1860961914062, -172.54275512695312, 285.02679443359375, -1130.0457763671875, 695.0011596679688, 784.00048828125, 1532.4119873046875, -335.94561767578125, 549.9754638671875, -172.30828857421875, 953.917724609375, -496.07733154296875, 456.21954345703125, 424.8052978515625, -1053.9140625, -555.8409423828125, 410.4726867675781, 391.33843994140625, 249.84446716308594, 575.7966918945312, -372.25201416015625, 287.7718505859375, 1167.6806640625, 1484.113525390625, 894.71142578125, 289.46734619140625, 375.54205322265625, 1672.81689453125, 112.24591827392578, -97.3294448852539, 185.00238037109375, 781.7404174804688, 364.6884460449219, 124.4932861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 416.36767578125, "std": 572.0675048828125, "min": -1364.4654541015625, "p10": -161.15526580810544, "median": 363.0835266113281, "p90": 1217.776550292969, "max": 2044.8974609375, "pos_frac": 0.78125, "sample": [590.150390625, 1055.4351806640625, 397.04913330078125, 129.09825134277344, 362.3471374511719, 457.3943176269531, 687.7662963867188, -58.32426071166992, -1.7791824340820312, 1412.5780029296875, -404.65386962890625, -70.82028198242188, 2044.8974609375, 323.9414978027344, 205.335693359375, 537.6467895507812, -41.894012451171875, 497.01885986328125, 360.8740539550781, 392.16259765625, -175.3271026611328, 593.995361328125, 1469.7628173828125, 19.475364685058594, 405.4130859375, 1175.6488037109375, -271.5274658203125, 998.4224243164062, 763.2349853515625, 59.37516784667969, 36.49779510498047, 89.41265106201172, -484.65167236328125, 59.003692626953125, 577.404052734375, 109.95085144042969, 295.0590515136719, -1364.4654541015625, 943.1854248046875, 101.82830810546875, -364.5045471191406, 953.2706298828125, 206.26333618164062, 701.9923095703125, -56.37260437011719, 1235.831298828125, 698.8390502929688, -191.67100524902344, 218.88967895507812, 363.8199157714844, 507.59063720703125, 1576.5595703125, 392.42547607421875, 648.8045043945312, 534.89990234375, 605.5281982421875, 998.51611328125, -128.087646484375, 268.49658203125, 22.588390350341797, 346.7305603027344, -28.77825927734375, 1490.8304443359375, 1367.14697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 341.55035400390625, "std": 521.6129150390625, "min": -988.0994262695312, "p10": -359.2471221923828, "median": 310.06410217285156, "p90": 1044.5432189941407, "max": 1439.3831787109375, "pos_frac": 0.734375, "sample": [209.0797882080078, 568.31982421875, -406.1932067871094, 144.27880859375, 116.18931579589844, 668.113037109375, 660.7222290039062, 65.80661010742188, 753.6719360351562, 821.1619873046875, 138.6119384765625, -440.0723876953125, -109.72509765625, 614.0613403320312, -378.0950012207031, -520.753173828125, 729.1976318359375, 511.1971130371094, 166.3267822265625, 234.87747192382812, 532.907470703125, -117.98806762695312, -109.33000183105469, 1065.867919921875, 156.03773498535156, 492.3455810546875, 1340.3018798828125, -129.390625, -315.26873779296875, 102.54708099365234, 366.3684997558594, -17.112138748168945, 308.8853759765625, 1134.443359375, 994.7855834960938, 461.0512390136719, 63.52784729003906, -145.75369262695312, 1349.6162109375, 956.5643920898438, 1283.2828369140625, 767.0560913085938, 78.79452514648438, -579.6452026367188, 662.2973022460938, 462.1790466308594, 158.8001251220703, 919.642822265625, -988.0994262695312, 290.5635986328125, -17.388134002685547, 1439.3831787109375, 1243.59326171875, 463.83349609375, 394.20220947265625, 601.862060546875, 608.813232421875, 363.4090270996094, -151.11676025390625, 188.06103515625, -14.904335021972656, 311.2428283691406, 856.4398803710938, -520.264404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 465.66778564453125, "std": 582.41552734375, "min": -919.0225830078125, "p10": -220.75611419677728, "median": 419.15626525878906, "p90": 1288.6878173828125, "max": 2157.630126953125, "pos_frac": 0.84375, "sample": [1610.6910400390625, 341.4360046386719, 246.78514099121094, 30.175151824951172, 842.2335205078125, 114.89247131347656, -160.3130645751953, 630.3931884765625, 388.309814453125, -358.62371826171875, -33.87464904785156, 446.1728210449219, 60.623802185058594, 330.08770751953125, 665.2120361328125, 1288.7686767578125, 2157.630126953125, 275.93243408203125, 511.27532958984375, 330.8301696777344, 435.5475158691406, 60.8193359375, 135.3984832763672, 1482.1888427734375, 39.509765625, 527.5031127929688, 340.5366516113281, 714.9277954101562, 1665.8033447265625, -246.6602783203125, 665.376220703125, 637.0032348632812, 833.7327270507812, 569.6576538085938, 1245.0728759765625, -60.562713623046875, -295.96466064453125, 121.89844512939453, 743.0955810546875, 116.05166625976562, 544.7299194335938, -591.3892211914062, 360.4260559082031, 793.3150634765625, 577.8193359375, 550.7821044921875, 999.6485595703125, 128.3850555419922, 501.2565612792969, 1384.138427734375, 611.6251220703125, 304.9555358886719, 402.7650146484375, -919.0225830078125, 222.8851776123047, -715.7325439453125, 670.4219970703125, 1505.8565673828125, 33.18882751464844, 317.7959289550781, 1288.4991455078125, 732.1117553710938, -431.3618469238281, 1080.0662841796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 458.553466796875, "std": 575.933349609375, "min": -402.5653076171875, "p10": -229.8931701660156, "median": 347.87213134765625, "p90": 1233.9930908203125, "max": 2108.4658203125, "pos_frac": 0.75, "sample": [572.468994140625, 1481.990478515625, 862.951171875, 288.5137634277344, 923.6332397460938, -347.07666015625, 145.60150146484375, 2108.4658203125, 857.9083862304688, -115.31039428710938, 532.3917846679688, -215.8756103515625, 381.625732421875, 1394.8896484375, -348.15716552734375, -65.99183654785156, -107.84366607666016, 99.6150894165039, 906.228759765625, 247.60345458984375, 127.8916015625, -317.650390625, -129.98995971679688, 251.8787384033203, -184.44024658203125, 159.56414794921875, 760.12451171875, -307.02606201171875, 1366.90380859375, -94.53104400634766, 1035.6552734375, 494.0789794921875, 114.57796478271484, 1456.147216796875, 246.01345825195312, 401.3146667480469, 301.51934814453125, 670.2833251953125, 1234.250244140625, 543.8506469726562, 561.5259399414062, -235.90069580078125, -92.05459594726562, -402.5653076171875, 412.13702392578125, 67.00833129882812, 253.2141876220703, 897.1826171875, 303.2027587890625, -345.8101806640625, 450.6689758300781, 1233.39306640625, 314.1185302734375, -163.265380859375, 773.8394775390625, 625.6487426757812, 899.5690307617188, 1925.62109375, 186.72268676757812, 1101.215087890625, 1048.383056640625, 868.9703369140625, 175.89344787597656, 754.6546630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 421.20489501953125, "std": 595.3870239257812, "min": -1112.9801025390625, "p10": -366.99030761718745, "median": 461.0929412841797, "p90": 1190.3092041015625, "max": 1759.94140625, "pos_frac": 0.84375, "sample": [28.65137481689453, 1272.669677734375, 1337.7039794921875, 662.3692016601562, 1037.7183837890625, -124.76011657714844, 67.80522155761719, -1112.9801025390625, 670.5429077148438, 1721.6414794921875, 948.3836669921875, 169.7310791015625, 663.2105712890625, 468.8397216796875, 79.62246704101562, 1759.94140625, 781.0244750976562, 679.3595581054688, 130.609619140625, 270.6043701171875, -67.41120910644531, 1006.5382690429688, 134.7552032470703, 39.073875427246094, 719.1828002929688, 181.73834228515625, -687.3530883789062, 704.6829223632812, 507.2537841796875, 399.407958984375, -319.39013671875, 89.97723388671875, 466.02496337890625, -658.1302490234375, 1173.563720703125, 163.6776580810547, 177.12103271484375, 62.074462890625, 680.6945190429688, -460.81793212890625, -387.390380859375, 848.7069702148438, 118.86799621582031, 456.1609191894531, -640.413330078125, 1010.0277099609375, 813.149658203125, 61.366756439208984, 593.2269287109375, 1033.9351806640625, 55.754417419433594, 1197.48583984375, 1031.3983154296875, 136.20904541015625, 1315.86328125, 486.201904296875, 610.0279541015625, 1251.066650390625, -869.2119750976562, 679.6642456054688, 188.4302215576172, 486.2674560546875, 371.14923095703125, 283.8151550292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 383.391357421875, "std": 749.5165405273438, "min": -1496.629150390625, "p10": -507.60672607421867, "median": 396.9486083984375, "p90": 1278.057751464844, "max": 2111.280029296875, "pos_frac": 0.75, "sample": [362.0263671875, 1213.5587158203125, -43.308624267578125, 457.24176025390625, -948.9268188476562, 272.29107666015625, 530.3840942382812, 1017.9078369140625, 1798.849853515625, 1579.3331298828125, -1044.552001953125, 2111.280029296875, 281.8769836425781, 865.4608764648438, -1072.002685546875, -542.1466064453125, -197.33209228515625, 335.9652404785156, 55.13934326171875, 1322.555419921875, 149.5333251953125, 1114.0687255859375, -66.69161224365234, 1195.442138671875, 398.6824645996094, 113.38095092773438, 209.32037353515625, 1050.333740234375, 1598.24560546875, 590.4697265625, 97.49801635742188, -287.254150390625, 204.50238037109375, 814.53271484375, 1139.388671875, 252.18679809570312, -419.63385009765625, 363.0658874511719, -427.013671875, 612.6213989257812, 548.1376953125, -134.02255249023438, 789.7904663085938, 1692.815673828125, 432.314453125, -374.8615417480469, 549.50634765625, -1496.629150390625, 848.0941162109375, 395.2147521972656, 610.1319580078125, 122.67378234863281, 813.0587768554688, 610.356201171875, -869.2841186523438, 938.9768676757812, 647.4356689453125, 80.30999755859375, 1305.7001953125, -320.5572204589844, -1309.5667724609375, 905.332763671875, 691.0759887695312, 2.7593441009521484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 311.61090087890625, "std": 638.9771728515625, "min": -937.8121337890625, "p10": -470.79964599609366, "median": 198.9585189819336, "p90": 1258.5067749023438, "max": 1911.279541015625, "pos_frac": 0.640625, "sample": [-23.189071655273438, 743.6510009765625, -240.51519775390625, 124.74932861328125, -558.6470947265625, -508.50384521484375, 1466.1693115234375, -614.893798828125, 1260.65478515625, 641.8505859375, -174.2769775390625, 1533.4168701171875, 1253.4947509765625, 1128.1278076171875, -10.819709777832031, -810.9617919921875, 1271.7835693359375, 1636.63525390625, 831.8016967773438, 146.2283172607422, -382.82318115234375, 135.6239013671875, 383.0055847167969, 81.39063262939453, 94.00834655761719, 1911.279541015625, 620.6464233398438, 108.96427154541016, 444.7783203125, -845.2689208984375, -0.15345191955566406, 526.8593139648438, -308.1446838378906, 251.688720703125, 729.6004028320312, 813.4684448242188, -146.59738159179688, 1297.9970703125, -223.16119384765625, 438.6843566894531, -860.72607421875, 672.7046508789062, -306.2264099121094, -937.8121337890625, -20.384214401245117, -72.8994140625, -30.578710556030273, 19.155479431152344, 503.7345886230469, 946.1260986328125, 530.386962890625, 348.9696350097656, 849.147705078125, 436.2626953125, 344.15460205078125, 924.050537109375, 33.237518310546875, 42.842132568359375, -123.88982391357422, 650.4216918945312, -59.2398681640625, 527.8355712890625, -199.42526245117188, 696.6486206054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 372.99493408203125, "std": 611.0072631835938, "min": -850.1109008789062, "p10": -385.72863464355464, "median": 308.54351806640625, "p90": 1128.8732177734378, "max": 1846.193359375, "pos_frac": 0.734375, "sample": [1078.07177734375, -63.504791259765625, 243.78807067871094, 52.223846435546875, -56.24116516113281, -684.2125854492188, 642.8810424804688, 458.11468505859375, 83.4897232055664, 847.1654052734375, 544.8449096679688, 371.01629638671875, 1815.5338134765625, 685.0731201171875, 136.92657470703125, 925.049560546875, 800.7197265625, 278.91021728515625, 953.3389282226562, -601.468505859375, -496.38287353515625, 1510.379638671875, -850.1109008789062, 1846.193359375, -232.55323791503906, 497.0447998046875, -114.23604583740234, 454.3114929199219, 522.9217529296875, -168.83377075195312, 46.80134582519531, 480.9969482421875, 1322.2320556640625, 540.4907836914062, 304.494140625, 561.7352905273438, 610.0889282226562, 32.42058181762695, 191.42007446289062, -745.7344360351562, -39.46347427368164, 865.0384521484375, 177.23536682128906, -180.89456176757812, 1150.645263671875, 902.98486328125, 995.2813110351562, 654.4701538085938, -414.6867370605469, 474.26617431640625, -765.7769775390625, 44.24407958984375, 240.58367919921875, -147.39666748046875, 312.5928955078125, -42.457401275634766, 1058.1224365234375, 939.3485107421875, 1549.108642578125, 233.7141876220703, 1203.976318359375, -318.15972900390625, 90.82424926757812, 62.674278259277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 426.7290344238281, "std": 609.0786743164062, "min": -550.331298828125, "p10": -219.02553405761716, "median": 328.1139373779297, "p90": 1054.0252563476563, "max": 2853.966796875, "pos_frac": 0.796875, "sample": [1063.2298583984375, 312.3311767578125, 711.8915405273438, 27.666555404663086, 15.941520690917969, 1013.2337646484375, 368.60101318359375, 984.7411499023438, -249.3692626953125, 419.26092529296875, 1625.18505859375, 103.59284973144531, 1213.0897216796875, 1032.5478515625, -155.46388244628906, 354.9556579589844, 336.7110290527344, 1854.599365234375, 216.97767639160156, -191.90670776367188, 95.01104736328125, -319.2984924316406, 716.3488159179688, 409.1675109863281, -252.62786865234375, -272.3611755371094, 226.61166381835938, 98.06287384033203, 352.1070556640625, 319.516845703125, 156.755615234375, 211.91937255859375, 288.468994140625, 230.06683349609375, 964.08837890625, 13.55003547668457, 401.1515808105469, 802.022705078125, 1643.758056640625, 426.6392822265625, -62.31459045410156, -550.331298828125, 473.9499816894531, -397.5661315917969, 472.1723327636719, 388.98095703125, 276.345703125, -151.6694793701172, 577.9486694335938, 691.3448486328125, 15.741020202636719, 788.1648559570312, 85.9912338256836, -161.8481903076172, 360.04998779296875, 76.87088012695312, 951.6572875976562, 100.03691101074219, -230.64788818359375, -10.695751190185547, 1863.3739013671875, 528.6768188476562, 801.6836547851562, 2853.966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 288.81427001953125, "std": 640.9573974609375, "min": -1190.3739013671875, "p10": -502.24892883300777, "median": 223.13790893554688, "p90": 1249.495178222657, "max": 2153.739501953125, "pos_frac": 0.703125, "sample": [-404.7083740234375, 20.37542724609375, 684.6590576171875, 59.34428405761719, 890.9089965820312, 693.2323608398438, 223.75302124023438, 500.77679443359375, 222.52279663085938, 535.003173828125, 2153.739501953125, -80.83566284179688, -576.3662109375, 453.37493896484375, 1090.131103515625, 1395.2777099609375, -54.53105163574219, 159.2749786376953, 427.4747619628906, 605.8635864257812, -140.31570434570312, 562.0809326171875, -87.62287902832031, 239.48915100097656, -540.3818359375, 242.92396545410156, 604.349853515625, 342.7599182128906, 256.18804931640625, 150.6304931640625, -328.1134033203125, 118.50780487060547, -521.076171875, 1522.855712890625, 46.37548828125, 1031.4774169921875, 1634.485107421875, 563.8075561523438, -118.00787353515625, -36.40026092529297, 268.4033203125, 563.7523803710938, 136.48440551757812, 296.19671630859375, -182.51409912109375, 286.37188720703125, 1317.7940673828125, 202.14524841308594, -458.3186950683594, -310.4439697265625, 1854.6070556640625, 52.54608917236328, -1190.3739013671875, 139.73153686523438, 312.9227600097656, -664.257568359375, 213.82118225097656, -624.2201538085938, 602.4083862304688, 63.439788818359375, 1574.4013671875, -417.3338928222656, 455.9604187011719, -552.696533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 518.5908203125, "std": 707.53076171875, "min": -1148.77587890625, "p10": -137.0120567321777, "median": 394.23529052734375, "p90": 1488.916821289063, "max": 2410.391845703125, "pos_frac": 0.78125, "sample": [879.79443359375, 605.1778564453125, 1108.23828125, 355.89434814453125, 490.60125732421875, 1583.8668212890625, -146.83694458007812, 1046.96826171875, 1521.5487060546875, -350.0013427734375, -17.607566833496094, -71.4385986328125, 267.5295715332031, 72.75857543945312, 857.439208984375, 99.17118072509766, 1220.6248779296875, 124.66148376464844, 533.513671875, 1784.7818603515625, 389.081298828125, 473.7457275390625, -27.288116455078125, 1033.46240234375, 222.2782440185547, 292.6408996582031, 37.63279342651367, 222.4668731689453, 130.8679656982422, -1043.1531982421875, -1148.77587890625, -41.46319580078125, 1309.5101318359375, 399.3892822265625, 584.5546875, 2367.732421875, 1334.6207275390625, -53.76655578613281, -406.026123046875, 146.36578369140625, 767.921630859375, 1327.1103515625, 122.77872467041016, 274.60150146484375, 453.1799621582031, -114.08731842041016, 643.281982421875, 935.1466064453125, 29.224227905273438, 1412.7757568359375, 241.38616943359375, -310.7142333984375, -197.70941162109375, 2410.391845703125, 1977.8978271484375, 31.770309448242188, 644.6064453125, 509.83203125, 712.2802734375, 763.0235595703125, 32.32218933105469, 1583.13525390625, -63.086814880371094, 812.1819458007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 405.0446472167969, "std": 602.208251953125, "min": -1216.18359375, "p10": -176.87521820068358, "median": 466.4502716064453, "p90": 1160.4602172851562, "max": 2251.2587890625, "pos_frac": 0.78125, "sample": [480.83819580078125, 647.119873046875, 332.7585144042969, 1082.852783203125, -1216.18359375, 474.1457824707031, 1306.146728515625, 636.295654296875, 223.3529052734375, 1220.436767578125, -10.268890380859375, -794.7200927734375, -873.4557495117188, 232.33729553222656, 1345.9755859375, -187.89830017089844, 676.3828735351562, 739.43017578125, 1161.5355224609375, 1767.54541015625, 274.0613098144531, 1286.3236083984375, 731.6080322265625, -323.880859375, 351.216552734375, 465.1090393066406, 516.6471557617188, 412.922119140625, -1000.380859375, 132.07591247558594, 570.4471435546875, 692.1740112304688, 467.79150390625, -119.0341567993164, -358.94610595703125, 571.7969360351562, 532.6513061523438, 1157.951171875, 209.63978576660156, 312.2022705078125, 164.96694946289062, 182.42013549804688, -122.1954574584961, 544.497802734375, -7.243024826049805, 664.6263427734375, 510.129638671875, 472.00775146484375, -25.23063850402832, 115.0628433227539, 352.2466735839844, 2251.2587890625, 831.0838012695312, 705.4703369140625, 517.8403930664062, 357.0152587890625, 18.483154296875, 1032.6971435546875, 535.10302734375, -151.15469360351562, 24.186813354492188, -60.80799102783203, 870.3131103515625, 13.076557159423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 349.8407287597656, "std": 521.8896484375, "min": -1968.6925048828125, "p10": -162.78264465332026, "median": 363.94407653808594, "p90": 954.3002136230472, "max": 1645.4351806640625, "pos_frac": 0.84375, "sample": [431.2661437988281, 893.5077514648438, 408.04595947265625, 469.25628662109375, 838.11572265625, 193.77069091796875, 1645.4351806640625, 303.2896728515625, 228.33770751953125, 438.07440185546875, 413.94940185546875, -694.9046630859375, 5.503007888793945, -0.5372962951660156, 11.536046981811523, 378.6900634765625, 879.4893798828125, 980.3541259765625, 584.297607421875, 1252.5372314453125, 203.5107421875, 594.903076171875, 692.0859375, 432.8904113769531, 350.0048828125, -10.436899185180664, 101.58142852783203, 338.35064697265625, 380.48626708984375, -1968.6925048828125, 708.957275390625, 308.49981689453125, 508.45806884765625, 361.93310546875, 1070.8153076171875, 701.9647827148438, 511.148193359375, -106.0018310546875, 1333.278564453125, 221.56552124023438, 417.8177490234375, 218.10446166992188, 830.0564575195312, -274.96051025390625, 152.65232849121094, 373.2076721191406, 579.3870239257812, 174.30032348632812, 365.9550476074219, 127.88156127929688, 1115.27587890625, 213.96922302246094, 94.40760040283203, 14.005928039550781, -354.5909729003906, 0.017452239990234375, 271.9541320800781, 470.7694091796875, -382.13348388671875, 1210.07080078125, -187.11727905273438, -346.90277099609375, 303.6158447265625, 606.7444458007812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 316.98529052734375, "std": 628.5616455078125, "min": -1172.170654296875, "p10": -522.4548095703125, "median": 243.3096466064453, "p90": 1036.0402465820312, "max": 2361.66552734375, "pos_frac": 0.75, "sample": [100.6669692993164, 694.1993408203125, 604.1904907226562, -1172.170654296875, 804.2252807617188, 577.91650390625, -30.982051849365234, 544.7945556640625, -558.2547607421875, 515.0677490234375, 116.1347885131836, -58.54037857055664, 988.3201904296875, -230.59017944335938, 309.87939453125, 660.5784912109375, -671.8782348632812, 1384.734619140625, 81.67928314208984, 378.1839904785156, 834.2965087890625, 973.827880859375, -249.8468780517578, 289.4801940917969, 3.9165191650390625, -327.00299072265625, 680.294677734375, 1018.551025390625, 1164.902099609375, 811.4075927734375, 154.41404724121094, 28.070831298828125, 1037.9368896484375, 79.01273345947266, 1176.0120849609375, 587.9312744140625, 58.14036560058594, 257.22198486328125, -458.88067626953125, 1540.297607421875, 2361.66552734375, 226.25315856933594, -197.04100036621094, 27.73668670654297, 1318.7869873046875, 169.42044067382812, 873.9556274414062, 21.836803436279297, -897.5987548828125, 515.9114990234375, 106.37007141113281, 1031.61474609375, 294.18804931640625, -26.40723419189453, 265.5535888671875, 797.2651977539062, -571.6556396484375, 229.39730834960938, 16.691213607788086, -549.7008666992188, -751.6672973632812, 444.51654052734375, -194.56500244140625, 76.39266204833984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 417.3103942871094, "std": 549.357421875, "min": -1213.0614013671875, "p10": -185.510920715332, "median": 379.8785400390625, "p90": 1102.5130126953127, "max": 1763.546630859375, "pos_frac": 0.796875, "sample": [782.6083984375, 1339.8570556640625, 1763.546630859375, 733.1343383789062, -74.68444061279297, 1042.44384765625, 359.85748291015625, 153.65966796875, -219.99856567382812, 253.24554443359375, -204.13494873046875, -142.0548553466797, 1340.63525390625, 428.683837890625, 186.6881103515625, 704.2473754882812, 399.89959716796875, 560.907470703125, -48.725425720214844, 1379.856689453125, 748.6135864257812, 643.0526733398438, 740.0402221679688, 4.730304718017578, 516.3262939453125, 357.98809814453125, -54.553497314453125, 484.38909912109375, 850.4161376953125, 330.0937805175781, 516.6591796875, 284.4197998046875, -928.61474609375, 629.2005615234375, -401.5995788574219, 714.6452026367188, 93.61072540283203, -91.18681335449219, 69.09243774414062, 280.03289794921875, 642.575439453125, 841.84716796875, 122.54915618896484, -1213.0614013671875, 1114.7777099609375, -526.5484619140625, 49.41033935546875, 1238.89111328125, 609.1724243164062, -294.25848388671875, 858.7257690429688, 1005.5181884765625, 148.06626892089844, 354.5691223144531, 215.2422332763672, 1019.7942504882812, 211.0307159423828, 1357.2147216796875, 1073.8953857421875, 408.0218505859375, -117.06705474853516, 255.30181884765625, 483.4849853515625, 321.68255615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 492.50860595703125, "std": 548.8863525390625, "min": -545.44189453125, "p10": -60.11379241943358, "median": 437.29261779785156, "p90": 1112.4923583984375, "max": 2436.92041015625, "pos_frac": 0.828125, "sample": [500.9939880371094, 178.4521484375, -176.7116241455078, 319.169677734375, -10.217309951782227, 168.80560302734375, 966.3694458007812, 667.7207641601562, 122.07106018066406, 250.5271453857422, 486.65679931640625, 101.00459289550781, 591.9155883789062, 1013.3284912109375, 1142.2708740234375, 43.51959991455078, 493.13031005859375, 472.8634033203125, 396.3197021484375, 1113.6767578125, -25.173248291015625, 259.617919921875, -39.26861572265625, -488.0233459472656, 1109.728759765625, 626.7955322265625, 315.14593505859375, 506.3684387207031, 86.13226318359375, -66.36384582519531, 64.9295883178711, 1024.83349609375, 864.3072509765625, 186.5435333251953, 672.0744018554688, 1235.717041015625, 214.79556274414062, 542.3847045898438, -545.44189453125, -336.9235534667969, 915.6746826171875, 1011.5565185546875, 727.6829833984375, 962.146728515625, 824.8336791992188, 923.3551025390625, 2436.92041015625, -164.48548889160156, 1140.4644775390625, -404.1939697265625, 917.2030029296875, 868.3013916015625, 236.24691772460938, 164.579345703125, 1165.660888671875, -45.53033447265625, 185.63763427734375, 878.6551513671875, 137.81243896484375, 106.44622802734375, 798.9325561523438, 2058.76708984375, 401.7218322753906, 222.11289978027344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 468.2989501953125, "std": 692.6209106445312, "min": -816.9988403320312, "p10": -306.5376007080078, "median": 345.2909851074219, "p90": 1284.8676879882814, "max": 2997.302734375, "pos_frac": 0.734375, "sample": [-504.824462890625, -308.3615417480469, 392.78887939453125, 43.35662841796875, 1228.987060546875, -26.94500732421875, 1191.416015625, 335.65008544921875, 953.7955322265625, 1186.4306640625, -25.608949661254883, 69.17916107177734, -619.287109375, 860.0921020507812, 50.96467208862305, 410.4572448730469, 52.073333740234375, 1462.6751708984375, 1955.0595703125, 533.3375244140625, 1047.3427734375, 870.3911743164062, 696.010986328125, 76.68498229980469, 250.07911682128906, 315.1982116699219, 515.8218383789062, 110.0794677734375, -128.55831909179688, 287.56695556640625, 1559.47314453125, -816.9988403320312, 1590.781982421875, -129.08328247070312, -8.763778686523438, 354.931884765625, 1045.84765625, -302.28173828125, 694.636474609375, 533.763671875, 968.0491333007812, -354.81134033203125, -107.47685241699219, 21.27181625366211, -134.08404541015625, 593.5042114257812, 789.9376831054688, 109.44947814941406, -282.4267883300781, 467.82647705078125, 473.19512939453125, 1308.8165283203125, 958.3185424804688, -379.96673583984375, 259.49566650390625, -503.47442626953125, 2997.302734375, 283.4420471191406, 1761.3507080078125, 39.2900390625, 860.120849609375, 1088.7005615234375, 958.8513793945312, -9.712032318115234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 496.60693359375, "std": 696.2738037109375, "min": -1001.169677734375, "p10": -291.0571624755859, "median": 495.9053649902344, "p90": 1552.2793823242191, "max": 2470.614990234375, "pos_frac": 0.75, "sample": [-417.00982666015625, 165.54541015625, -241.3228759765625, -149.7394256591797, 696.822021484375, 826.4300537109375, 1022.933837890625, 1120.3472900390625, 740.6449584960938, 2312.8447265625, -1001.169677734375, 913.3565673828125, 78.81349182128906, -271.4862365722656, 888.517333984375, 1206.0220947265625, 632.6970825195312, 129.35304260253906, 377.7056579589844, 1009.4208374023438, 114.53839111328125, -509.47430419921875, 117.28640747070312, 1601.24267578125, 557.3609619140625, -720.542236328125, 1593.3294677734375, -231.4335479736328, 215.22286987304688, 233.22509765625, -49.028526306152344, 410.1956481933594, 675.9398193359375, 512.2815551757812, -129.12344360351562, 1699.755126953125, 736.2335205078125, 37.237159729003906, -299.4447021484375, 875.5038452148438, 554.3613891601562, 481.736572265625, 546.3782958984375, 252.59765625, 512.0332641601562, 2470.614990234375, -489.508056640625, 1652.4580078125, 456.2678527832031, 4.5742950439453125, 604.7509765625, 1025.5030517578125, 710.90966796875, -76.92225646972656, 1907.1845703125, 586.8988037109375, -365.17144775390625, -32.044342041015625, 377.3954162597656, 828.3465576171875, -58.250244140625, 510.07415771484375, 1456.495849609375, 385.1265563964844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 375.9776611328125, "std": 632.5943603515625, "min": -1223.129638671875, "p10": -266.39003448486324, "median": 305.69566345214844, "p90": 1071.5319580078128, "max": 2699.08203125, "pos_frac": 0.75, "sample": [-62.11351013183594, -286.7928771972656, -31.902496337890625, 239.54232788085938, 71.08773040771484, 164.22315979003906, 850.6668090820312, 607.304931640625, 928.15771484375, 234.72805786132812, 583.9091796875, -79.39944458007812, 115.534912109375, -199.514404296875, 417.4449768066406, 461.99560546875, 258.94146728515625, 829.0586547851562, 1653.36572265625, -218.7834014892578, 2699.08203125, 153.69351196289062, 643.403564453125, -2.937345504760742, 141.3182830810547, 932.942626953125, -172.81854248046875, -354.6744689941406, 399.37774658203125, 362.7325134277344, 52.33318328857422, -292.20263671875, 868.90234375, -558.2274169921875, -99.9210205078125, 520.86083984375, 1194.38916015625, 91.03848266601562, 247.6131591796875, 737.565673828125, 62.94114685058594, 2381.79638671875, -717.0827026367188, -1223.129638671875, 519.6563720703125, 400.4443054199219, -350.41912841796875, 1111.0374755859375, 136.07421875, 5.16850471496582, 542.6411743164062, 377.966064453125, 1271.4359130859375, 352.4498596191406, 249.7884063720703, 412.54803466796875, 438.6114196777344, 1189.2227783203125, 404.820068359375, 169.3194122314453, 979.3524169921875, 414.5762939453125, -49.57012176513672, 880.9932861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 429.0714416503906, "std": 598.4528198242188, "min": -627.26123046875, "p10": -364.2671875, "median": 329.70579528808594, "p90": 1112.452087402344, "max": 2219.82177734375, "pos_frac": 0.765625, "sample": [-550.5244140625, 77.05111694335938, 39.344810485839844, -411.4555358886719, 413.3781433105469, 1054.1744384765625, 201.74447631835938, 210.95989990234375, 1229.5491943359375, 820.3640747070312, 196.5800323486328, 2219.82177734375, 939.1177978515625, -22.8253173828125, -427.0106506347656, 303.21234130859375, 478.21673583984375, 267.6313781738281, 278.1138000488281, 1263.023193359375, 891.5148315429688, -386.60711669921875, 1683.613037109375, 22.35285186767578, 999.0548095703125, 695.5101318359375, 585.688720703125, 272.676025390625, -98.88941192626953, -312.14068603515625, 602.5534057617188, 718.1697998046875, -403.6737060546875, 352.0834655761719, -488.2380065917969, 330.43890380859375, 257.3927001953125, 827.4334106445312, -128.98208618164062, -127.8314208984375, -627.26123046875, -152.57928466796875, 2207.343994140625, 1137.42822265625, 243.98855590820312, 610.7338256835938, 215.20623779296875, 648.460205078125, 910.3807983398438, 341.3526611328125, -18.498092651367188, 800.3980712890625, -48.65095520019531, 219.8306427001953, 1046.214599609375, 1549.6236572265625, 386.785888671875, 698.1563110351562, 38.72679901123047, 274.78912353515625, 630.3734130859375, 599.7896728515625, 546.4198608398438, 328.9726867675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 345.88134765625, "std": 594.7735595703125, "min": -1292.4283447265625, "p10": -250.3868774414062, "median": 310.56687927246094, "p90": 1079.925341796875, "max": 2561.0185546875, "pos_frac": 0.78125, "sample": [462.61639404296875, -123.34077453613281, 528.66162109375, 211.2194061279297, 93.56157684326172, 180.6419219970703, 1129.7373046875, 367.43414306640625, 7.71368408203125, 1325.1383056640625, -409.087158203125, 684.9850463867188, 354.82977294921875, -211.7049560546875, 145.88372802734375, 1063.988037109375, -664.1649780273438, 251.11907958984375, 342.26361083984375, 103.80323028564453, -200.32888793945312, -39.95439147949219, 320.8896789550781, 500.3863525390625, -459.4796447753906, 672.9228515625, -157.18255615234375, 1442.795654296875, 665.2945556640625, 171.04566955566406, 710.0816650390625, 166.12094116210938, 771.5575561523438, 279.8440856933594, 497.21612548828125, 396.6527099609375, 1086.755615234375, 113.4034423828125, 1565.072509765625, 337.6000671386719, -920.5608520507812, 583.742919921875, -266.96484375, 386.8941650390625, 735.88720703125, 93.06800842285156, -14.893699645996094, 69.81133270263672, -1292.4283447265625, 300.24407958984375, 569.296142578125, 296.7419738769531, -104.22201538085938, 501.6944274902344, 283.2419738769531, 62.02403259277344, 271.007080078125, 2561.0185546875, 1545.6712646484375, 783.2840576171875, 328.5472717285156, -376.9577331542969, 608.17919921875, 446.0867004394531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 402.69097900390625, "std": 514.1507568359375, "min": -583.16943359375, "p10": -256.9040603637695, "median": 413.7604522705078, "p90": 981.6062133789063, "max": 1858.4898681640625, "pos_frac": 0.765625, "sample": [711.27197265625, 918.2352294921875, 411.49139404296875, 527.9041137695312, -525.80615234375, 1518.218505859375, -459.5905456542969, 124.64216613769531, -168.78370666503906, 583.3095703125, 752.6011962890625, -5.353960037231445, 59.080772399902344, 608.4013061523438, -145.00320434570312, 110.64767456054688, -583.16943359375, 457.917236328125, -61.983978271484375, 416.0295104980469, 489.06109619140625, 620.528564453125, 88.905517578125, 855.76904296875, 24.413429260253906, 396.1693115234375, 770.4219970703125, 494.8490295410156, 987.9760131835938, -207.8980255126953, -16.282047271728516, -46.96173095703125, 271.4801025390625, 410.7983093261719, 599.8938598632812, 304.00384521484375, 436.25994873046875, 429.48284912109375, 1066.9071044921875, -304.7186279296875, 1858.4898681640625, 249.4215087890625, 591.0810546875, 1083.2969970703125, 1233.3079833984375, 226.08551025390625, 61.53997802734375, 879.554931640625, 167.42245483398438, -277.9066467285156, 658.33056640625, -71.74761199951172, 605.5408935546875, 786.5049438476562, -286.7778625488281, 966.7433471679688, -409.6538391113281, 160.73220825195312, 1747.81884765625, 871.2202758789062, 804.7452392578125, 491.24542236328125, 174.41445922851562, 279.692138671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 515.8161010742188, "std": 794.779296875, "min": -1164.09375, "p10": -265.1818969726561, "median": 448.723876953125, "p90": 1205.514770507813, "max": 3888.588134765625, "pos_frac": 0.796875, "sample": [130.69979858398438, 113.28913116455078, 109.51158905029297, -732.6279296875, 270.43475341796875, 371.3028259277344, 1243.89697265625, 483.49102783203125, 642.83544921875, -336.43182373046875, 844.6685180664062, 1749.361328125, 105.50862121582031, 465.7375793457031, 756.3720703125, -1164.09375, -94.35501098632812, 584.7764892578125, 906.5228881835938, 431.7101745605469, 663.9537353515625, -98.84906005859375, 691.6725463867188, 773.6217651367188, -329.8358154296875, 608.0101318359375, 1319.8184814453125, 279.9677429199219, -491.8703918457031, 739.4581298828125, 1677.41796875, 706.3585815429688, 2476.06689453125, 552.6317138671875, 2936.893798828125, 1059.097412109375, 52.52544403076172, 756.5440063476562, -78.4338607788086, 355.0972595214844, 343.3268127441406, 141.51222229003906, 836.9788208007812, 3888.588134765625, 766.0316162109375, 126.29052734375, 478.5978698730469, -114.32275390625, -827.55712890625, 156.48512268066406, 325.851806640625, 269.7998352050781, 1024.5438232421875, -12.356735229492188, -7.767890930175781, 957.0078125, 297.57891845703125, 589.226806640625, 639.234130859375, 1115.956298828125, 276.95355224609375, 530.3295288085938, -377.71826171875, 54.90321731567383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 319.82086181640625, "std": 583.7139892578125, "min": -1873.4217529296875, "p10": -362.01637878417966, "median": 288.2045440673828, "p90": 946.5927917480473, "max": 1765.27099609375, "pos_frac": 0.75, "sample": [703.5379638671875, 745.0860595703125, 170.39112854003906, 239.87620544433594, -365.5521545410156, 665.2915649414062, 86.34170532226562, 685.7845458984375, 535.1818237304688, -383.50604248046875, -10.803337097167969, 481.54180908203125, -36.57634735107422, 1416.6533203125, 859.5299072265625, 417.426513671875, 231.89608764648438, 191.39627075195312, -565.7024536132812, -1873.4217529296875, 432.85601806640625, 1765.27099609375, 168.68008422851562, 1073.4521484375, -260.1524658203125, 379.9371337890625, 180.13113403320312, -353.7662353515625, 812.9986572265625, 727.4130859375, 1400.313232421875, -110.89591979980469, 29.93177032470703, 626.3331298828125, 284.00738525390625, 1356.26318359375, -492.4267883300781, 239.17068481445312, 315.47076416015625, 669.52880859375, 672.0913696289062, 397.33599853515625, -13.260814666748047, 773.7711791992188, 123.64291381835938, 259.610595703125, 983.9054565429688, 165.74514770507812, -93.2523193359375, 298.689208984375, 38.19395446777344, 553.1656494140625, 248.9870147705078, -144.3110809326172, 619.387451171875, 188.52682495117188, 327.08282470703125, 292.4017028808594, -150.76304626464844, 323.8360290527344, -449.3594055175781, -785.843505859375, 837.3268432617188, 1562.73779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 541.71533203125, "std": 746.5087890625, "min": -820.2274169921875, "p10": -242.5006118774414, "median": 418.6577453613281, "p90": 1495.4912841796875, "max": 2563.951904296875, "pos_frac": 0.734375, "sample": [1237.5374755859375, 441.2402038574219, 2545.052490234375, 1559.9522705078125, 1167.997802734375, 801.0768432617188, -112.87928009033203, 1357.0589599609375, 127.38082122802734, 263.8541564941406, 281.14117431640625, 801.431640625, -68.24510192871094, -78.21769714355469, -784.2521362304688, -164.74867248535156, -417.5389099121094, -820.2274169921875, 323.4741516113281, 1383.1837158203125, -786.9590454101562, 1193.5421142578125, 2047.1585693359375, -461.544677734375, 1482.2921142578125, 605.8477172851562, 52.752052307128906, 1003.7174682617188, 555.429931640625, 34.22880554199219, -47.696533203125, 1108.666259765625, 382.30340576171875, -47.10279083251953, 458.46649169921875, 1124.12060546875, 114.718017578125, 210.8743896484375, -252.72418212890625, 341.20953369140625, 964.786376953125, 373.36859130859375, 34.31678771972656, 1773.1485595703125, 425.63751220703125, 418.01116943359375, -233.41476440429688, 505.0213317871094, 1113.1077880859375, -170.73721313476562, 670.3609619140625, -246.39454650878906, 252.56158447265625, 419.3043212890625, 775.3531494140625, 657.9027709960938, 387.0217590332031, 1929.665283203125, 465.8953857421875, 1501.1480712890625, 1161.674072265625, -3.1982498168945312, 2563.951904296875, -32.28226089477539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 409.52105712890625, "std": 587.4237060546875, "min": -856.4209594726562, "p10": -274.27060546875, "median": 390.42462158203125, "p90": 1130.5259521484377, "max": 2489.8720703125, "pos_frac": 0.78125, "sample": [994.4175415039062, 260.8488464355469, 399.7577209472656, -413.4059143066406, -159.2467041015625, -50.234046936035156, 1098.494873046875, 325.2882385253906, 405.58135986328125, 424.200927734375, -545.4041748046875, 844.7821655273438, 62.024505615234375, 1101.3221435546875, 818.37890625, 1035.228271484375, 522.5880126953125, 232.08242797851562, 482.62445068359375, 1283.142333984375, 854.9815673828125, 110.99640655517578, -856.4209594726562, 1267.274169921875, 726.1945190429688, -263.18792724609375, 741.417236328125, -362.47723388671875, 558.9668579101562, 786.0993041992188, 548.6076049804688, -800.2274169921875, 129.93861389160156, 381.0915222167969, 94.59783935546875, -424.8526611328125, 586.5377197265625, 421.7331848144531, 110.6458740234375, -215.54931640625, 163.08082580566406, 269.28961181640625, 134.36895751953125, 224.73672485351562, 2489.8720703125, 26.736637115478516, 851.3515625, 1143.0418701171875, 1265.6702880859375, 155.12469482421875, 126.28340148925781, 748.8124389648438, 1401.5438232421875, 179.1920166015625, -228.4669647216797, -279.02032470703125, 480.4080810546875, -3.6393280029296875, 869.5599365234375, -163.99595642089844, 1170.6353759765625, 644.1937866210938, 932.5123291015625, 89.215087890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 524.9864501953125, "std": 628.9331665039062, "min": -948.6369018554688, "p10": -74.9433441162109, "median": 459.2147521972656, "p90": 1347.7057250976563, "max": 2634.6533203125, "pos_frac": 0.84375, "sample": [633.3153076171875, 1367.4129638671875, 1301.72216796875, -195.5210418701172, 1414.9539794921875, 963.5853271484375, -17.508464813232422, 1162.4224853515625, 1057.708251953125, 35.67035675048828, 1514.6534423828125, 208.82852172851562, 688.3125610351562, 276.9123229980469, 1630.9190673828125, -345.5390625, -38.81263732910156, 386.68682861328125, 513.3988037109375, -90.42793273925781, 390.4373779296875, 53.76464080810547, 691.6471557617188, 642.5159912109375, 1238.66015625, 322.21405029296875, 1208.06005859375, 1062.720458984375, 79.18695068359375, 688.2636108398438, 767.6669311523438, 73.68246459960938, 10.055364608764648, 570.1026611328125, 425.1439208984375, 1838.013427734375, 756.5278930664062, 669.2647094726562, 2634.6533203125, 4.033191680908203, -948.6369018554688, 1017.71728515625, 649.4337768554688, 839.9818115234375, 704.2574462890625, -696.235595703125, 146.11976623535156, 1526.576171875, -1.3632774353027344, 251.5218505859375, 776.9591064453125, -282.9952392578125, -423.8285217285156, 228.45208740234375, 989.384033203125, 493.28558349609375, 272.31292724609375, 392.4794616699219, 55.748497009277344, 607.2680053710938, 5.1595916748046875, 69.52457427978516, 230.60775756835938, 100.09153747558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 498.614990234375, "std": 707.2228393554688, "min": -1029.4425048828125, "p10": -332.5058502197264, "median": 494.7268829345703, "p90": 1394.6672851562503, "max": 2629.1865234375, "pos_frac": 0.75, "sample": [209.43841552734375, 356.64117431640625, -773.9853515625, 988.0926513671875, -23.906661987304688, 1328.8326416015625, 89.173095703125, 373.0013122558594, 602.5975952148438, 2.0893287658691406, 1246.31396484375, 282.48992919921875, 518.06005859375, 961.0541381835938, 301.18865966796875, -170.14547729492188, 1448.5146484375, 752.0994262695312, 149.9867706298828, -131.65248107910156, 1086.2088623046875, -20.12518310546875, 604.9850463867188, 976.3905029296875, -519.1262817382812, -83.48644256591797, 524.0200805664062, -143.87698364257812, 566.3510131835938, -402.0888671875, 577.6017456054688, 1835.1748046875, -123.74481201171875, 834.5048217773438, -542.8824462890625, -117.96021270751953, 732.5812377929688, -736.96435546875, -39.15614318847656, 764.85791015625, 1478.8287353515625, 316.7198791503906, 1533.135498046875, 471.3937072753906, 1180.64306640625, 1152.9398193359375, 1237.4488525390625, 331.2584228515625, 521.0341186523438, 712.9117431640625, 43.184043884277344, 324.9413146972656, 1411.310546875, 105.70198059082031, 775.3504638671875, 2629.1865234375, 2087.048583984375, 133.0169677734375, -675.373046875, -1029.4425048828125, 1355.8330078125, 199.37261962890625, 551.2366943359375, 780.5310668945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 414.7456359863281, "std": 674.184814453125, "min": -1846.0274658203125, "p10": -229.39448699951166, "median": 253.18852996826172, "p90": 1121.904919433594, "max": 2537.43212890625, "pos_frac": 0.828125, "sample": [181.70706176757812, 738.8456420898438, 160.21255493164062, 1511.360595703125, 229.1548309326172, 1244.913330078125, 1.8037872314453125, -10.215385437011719, 68.74493408203125, 104.41934204101562, 985.1171875, 241.17652893066406, 229.4169158935547, -341.04132080078125, 829.6411743164062, -604.0062255859375, 950.1543579101562, -1846.0274658203125, 287.2247619628906, 382.2078857421875, 265.2005310058594, 182.09683227539062, 2061.1005859375, 180.5078125, 73.55915832519531, 1136.6043701171875, -173.75729370117188, 449.58990478515625, 528.715087890625, 114.84307861328125, 396.6314392089844, 1059.8096923828125, 645.5902099609375, 1010.3560791015625, 1319.0572509765625, 45.946388244628906, 74.58244323730469, 108.20321655273438, -498.24249267578125, -111.15113067626953, -870.552001953125, 1058.532470703125, 1087.606201171875, 73.53419494628906, 1744.713134765625, 531.1823120117188, 532.8554077148438, 1072.929443359375, 168.9849853515625, 492.55224609375, 935.9309692382812, -253.23899841308594, 230.6739044189453, 9.134368896484375, 785.9022827148438, -335.3647155761719, 209.29705810546875, 340.4271545410156, 499.4714660644531, 820.1749877929688, -170.53346252441406, 2537.43212890625, 680.75634765625, 147.2643280029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 325.7138366699219, "std": 582.16552734375, "min": -850.7196044921875, "p10": -397.59974975585936, "median": 283.10145568847656, "p90": 1094.54208984375, "max": 1781.0535888671875, "pos_frac": 0.734375, "sample": [972.0596923828125, -12.022125244140625, 142.87103271484375, 620.48046875, 531.7830810546875, 4.970743179321289, 297.51220703125, -232.18099975585938, 312.9114685058594, 1078.636962890625, 1373.6107177734375, 594.80322265625, 113.98311614990234, 68.97209167480469, -727.6363525390625, 1161.3958740234375, 492.0005187988281, 281.5630798339844, 150.49755859375, -60.429405212402344, 1393.5677490234375, 600.099853515625, 548.00830078125, 397.6117248535156, -354.0561218261719, -668.0985717773438, 499.25994873046875, 227.755859375, 7.840858459472656, 1257.4862060546875, 1663.3173828125, 163.04869079589844, -525.8893432617188, -8.540199279785156, -0.7070503234863281, -766.0372924804688, -167.44674682617188, 230.81759643554688, 939.369384765625, 607.869384765625, 551.1417846679688, 284.63983154296875, 329.51739501953125, 627.8930053710938, -850.7196044921875, 949.2314453125, 141.63832092285156, -406.70501708984375, 133.66302490234375, 729.8560791015625, 430.2032470703125, -330.2657775878906, 1781.0535888671875, 413.4049072265625, 752.1849365234375, 160.05242919921875, 268.1153564453125, 138.82423400878906, -293.64251708984375, 539.1726684570312, 1091.0391845703125, -376.3541259765625, 1096.0433349609375, -525.3631591796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 247.66403198242188, "std": 570.5321044921875, "min": -1230.816162109375, "p10": -442.00227661132806, "median": 243.26773071289062, "p90": 751.5813415527344, "max": 2132.3701171875, "pos_frac": 0.75, "sample": [291.7218017578125, 139.65496826171875, -148.11529541015625, 276.1892395019531, 453.8361511230469, -225.72222900390625, 35.918785095214844, -581.3189697265625, 723.3030395507812, 179.55909729003906, -310.9224548339844, 365.94256591796875, 304.0191345214844, 447.7994384765625, 200.37570190429688, 209.15072631835938, 483.7496337890625, 150.13658142089844, 61.687164306640625, 1328.7862548828125, 745.9783935546875, 498.4562072753906, 136.00189208984375, 109.86901092529297, 428.7632141113281, 403.4885559082031, 163.80506896972656, -112.83735656738281, 399.29315185546875, -1230.816162109375, 84.92941284179688, 460.75567626953125, -472.32379150390625, -294.8726501464844, 662.3792724609375, 310.070556640625, 1148.3221435546875, 676.7744750976562, 571.335693359375, 502.5998229980469, 582.6854248046875, -518.5346069335938, 919.4171142578125, 556.5112915039062, 1939.2542724609375, 509.64990234375, 753.9826049804688, -665.6187133789062, 233.46002197265625, 543.1633911132812, -1069.19287109375, 158.7053680419922, 59.26280975341797, 253.075439453125, 199.12680053710938, -146.4476318359375, 812.193115234375, -210.50592041015625, -107.04509735107422, -371.2520751953125, 2132.3701171875, 348.89874267578125, -697.6827392578125, 57.297515869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 542.3302001953125, "std": 869.9449462890625, "min": -1942.5499267578125, "p10": -443.1932342529297, "median": 431.0712585449219, "p90": 1369.5287231445316, "max": 3117.508544921875, "pos_frac": 0.78125, "sample": [-1942.5499267578125, 847.7000732421875, 253.93145751953125, 407.5331115722656, 98.6574478149414, 182.9727020263672, 160.92840576171875, 68.01493835449219, 2406.734130859375, 891.2002563476562, 258.287109375, -953.6146240234375, 801.6087646484375, 202.34962463378906, 1291.2335205078125, 293.5068664550781, -446.631103515625, 1317.136474609375, 853.471435546875, -224.65975952148438, -173.06256103515625, -519.067138671875, -400.88287353515625, 166.4772186279297, -931.629150390625, -676.9136962890625, 149.85153198242188, 1293.546630859375, 1015.4586181640625, 127.05105590820312, 676.4401245117188, 434.767822265625, 697.0772705078125, 1096.5537109375, 951.6873779296875, -937.6748046875, -435.1715393066406, 1725.8038330078125, 368.4377746582031, 497.5501708984375, 767.1798095703125, 1391.9825439453125, 1209.3699951171875, 2479.513671875, 1097.5865478515625, -125.11481475830078, 421.7181396484375, 668.2994384765625, 393.0303955078125, 3117.508544921875, 829.4800415039062, 73.87694549560547, 1544.219970703125, 1239.584716796875, 988.6712646484375, 474.77056884765625, -92.44132995605469, 1288.5440673828125, 2482.791748046875, -40.517486572265625, 427.37469482421875, 1106.8878173828125, 665.3525390625, 405.34912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 401.0638427734375, "std": 842.705078125, "min": -1187.1668701171875, "p10": -522.2636352539063, "median": 256.1150588989258, "p90": 1396.1100341796878, "max": 3454.212646484375, "pos_frac": 0.671875, "sample": [-1173.5419921875, -649.39697265625, 37.864501953125, 1119.2938232421875, 1593.9593505859375, -128.24313354492188, 298.4868469238281, 237.20730590820312, 248.93675231933594, -405.4488220214844, 633.8909301757812, 812.1563720703125, 425.80645751953125, 1845.3914794921875, -580.3525390625, 232.88946533203125, 1236.750732421875, 420.32696533203125, 1119.715087890625, 1121.4141845703125, -793.4826049804688, -793.7609252929688, 1257.63525390625, 1306.119873046875, -5.0650787353515625, -514.48681640625, -200.14999389648438, 624.8333129882812, 580.0269775390625, -113.13262939453125, 101.93930053710938, 630.471923828125, -372.5809326171875, 85.2786865234375, 1434.67724609375, -332.4022521972656, -94.01153564453125, -353.58489990234375, 790.2471313476562, 686.1035766601562, -28.243892669677734, 1864.623779296875, 686.44873046875, 636.6845703125, 1126.095458984375, 78.67886352539062, -246.39675903320312, 2556.41259765625, 1592.2186279296875, 270.4276123046875, 93.19751739501953, 54.20565414428711, 427.3949890136719, 3454.212646484375, -1187.1668701171875, -525.5965576171875, 633.30517578125, -147.03369140625, 1303.65771484375, -79.47976684570312, 308.61932373046875, 156.58502197265625, 4.158575057983398, 263.2933654785156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 373.9115295410156, "std": 596.798095703125, "min": -1016.1434936523438, "p10": -268.3187652587891, "median": 314.6260223388672, "p90": 1257.994897460938, "max": 1994.7149658203125, "pos_frac": 0.75, "sample": [464.7897644042969, -404.2547607421875, 436.25860595703125, 591.3428955078125, 465.70721435546875, 736.398681640625, 320.21148681640625, 293.0587158203125, -83.1602554321289, 1131.3543701171875, -267.8900451660156, 286.05377197265625, -268.50250244140625, 1312.2694091796875, 500.2100830078125, 140.92222595214844, 977.8233642578125, 514.115234375, -86.69277954101562, 1381.628662109375, 1994.7149658203125, 208.46310424804688, 471.2311706542969, 1129.11474609375, -532.7190551757812, 193.67953491210938, 560.72314453125, 218.21804809570312, 144.8996124267578, 88.30654907226562, -25.21630096435547, 1590.4390869140625, 418.306884765625, 1809.526611328125, -130.1300048828125, 781.4066162109375, 93.91756439208984, 525.4869384765625, -4.666690826416016, 372.49127197265625, 380.65399169921875, 553.0060424804688, -196.25421142578125, -110.29852294921875, 1345.389404296875, 680.6897583007812, 654.7791137695312, -1016.1434936523438, 126.56952667236328, 246.45718383789062, -237.7005615234375, -971.9755859375, 1667.908447265625, 309.0405578613281, 93.51219177246094, 249.12948608398438, 462.4346618652344, 202.75442504882812, -434.41998291015625, 468.85595703125, 276.68695068359375, 827.072998046875, -376.5568542480469, 378.90863037109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 569.275146484375, "std": 735.7957153320312, "min": -1546.4244384765625, "p10": -236.80297393798818, "median": 551.3020935058594, "p90": 1536.2752929687501, "max": 3088.060546875, "pos_frac": 0.828125, "sample": [603.128662109375, -393.9021301269531, -763.6119384765625, -137.4376983642578, 1555.694580078125, -2.0017929077148438, 109.26810455322266, 536.0140991210938, 705.8444213867188, 895.4188232421875, 901.7744750976562, 342.564453125, 257.56280517578125, 847.3919677734375, 196.89501953125, -350.62896728515625, -279.3880920410156, 913.7981567382812, -1546.4244384765625, 132.12158203125, 1634.2828369140625, 184.8612060546875, 999.4034423828125, 566.590087890625, 1194.243408203125, 3088.060546875, 165.30111694335938, 1254.569091796875, 1101.66552734375, -89.25833129882812, 1490.963623046875, 734.1052856445312, 960.9275512695312, 360.1602783203125, 905.9198608398438, 1711.3480224609375, 169.92889404296875, 749.4884033203125, 461.7073059082031, 636.0928955078125, -120.04487609863281, 1319.983642578125, -300.6141052246094, 602.6431884765625, 268.8356628417969, 130.5325927734375, 1650.6561279296875, 315.934326171875, 1326.7191162109375, 1211.1461181640625, -1124.4449462890625, 390.5274963378906, 162.04940795898438, 175.07144165039062, 251.3195037841797, 652.71337890625, 887.0177612304688, 1662.8232421875, 313.0790710449219, 216.23284912109375, 1722.6011962890625, 958.2507934570312, 717.6044921875, 238.5292510986328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 407.2692565917969, "std": 633.2926635742188, "min": -1052.7784423828125, "p10": -134.54090270996093, "median": 359.6867370605469, "p90": 911.4675354003906, "max": 2763.933349609375, "pos_frac": 0.8125, "sample": [658.5090942382812, 388.84930419921875, -70.82942962646484, 395.56671142578125, 566.251953125, 336.9475402832031, -742.8109741210938, 360.20404052734375, 2494.69580078125, 529.3323364257812, 92.6238784790039, -340.82421875, 82.04951477050781, -268.28826904296875, -140.24252319335938, 583.9813232421875, 197.5894317626953, 359.16943359375, 52.324432373046875, 247.95643615722656, 1208.1868896484375, -121.23712158203125, 528.3006591796875, 2763.933349609375, 876.6997680664062, 70.22891235351562, 636.1485595703125, 736.436279296875, 182.7324981689453, -1014.650146484375, 241.07205200195312, -29.5401611328125, 851.8815307617188, 109.20143127441406, 272.3634033203125, 905.1800537109375, 484.9857177734375, -488.19158935546875, 1104.4615478515625, 502.51806640625, 838.6063232421875, 914.1621704101562, 21.232398986816406, 763.1334228515625, 217.01223754882812, -17.3040771484375, 421.8465881347656, -29.170379638671875, 75.38093566894531, 340.1484375, 1477.981689453125, 479.483154296875, 759.158935546875, 696.9159545898438, 577.0213623046875, 847.340087890625, -1052.7784423828125, 5.010643005371094, 43.71589279174805, 1501.4202880859375, 683.7900390625, 652.7861328125, 12.528034210205078, 232.0418701171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 311.5841369628906, "std": 636.8782958984375, "min": -1178.56494140625, "p10": -293.19122314453125, "median": 217.85650634765625, "p90": 1041.1324707031251, "max": 2773.775634765625, "pos_frac": 0.734375, "sample": [153.91851806640625, 250.26419067382812, -860.2708129882812, 1690.484619140625, -156.38409423828125, -108.19923400878906, 685.4703979492188, 106.13381958007812, 918.8624877929688, 15.62261962890625, 1410.185302734375, 219.7279052734375, 632.0823364257812, 35.09452819824219, 708.92138671875, -292.66485595703125, 736.3081665039062, 322.75103759765625, -293.41680908203125, 49.477783203125, -19.207019805908203, 540.4566650390625, 1130.231689453125, -343.978759765625, -1178.56494140625, 129.96417236328125, 338.88262939453125, -757.4441528320312, 444.149658203125, 597.6895141601562, -248.76181030273438, 378.2890319824219, -58.303611755371094, -58.75647735595703, -79.91708374023438, 84.60467529296875, 987.07763671875, 584.1351318359375, 834.2609252929688, 174.88571166992188, 91.68275451660156, -666.3277587890625, 405.9748840332031, 1464.882080078125, 44.354400634765625, -78.66670227050781, 2773.775634765625, 516.9122924804688, -26.561565399169922, -426.0748596191406, 1064.298828125, 494.7957458496094, 314.13079833984375, 257.50714111328125, 235.57608032226562, 1813.37646484375, 27.799560546875, 108.43376159667969, 215.985107421875, 19.681835174560547, 529.6477661132812, 257.5101318359375, 742.1761474609375, 56.45086669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 490.3962097167969, "std": 585.9351196289062, "min": -1104.004638671875, "p10": -225.34830627441403, "median": 512.1863250732422, "p90": 1249.5452880859375, "max": 1511.2264404296875, "pos_frac": 0.796875, "sample": [738.279296875, -389.97027587890625, 783.35791015625, 365.0274658203125, 545.4193725585938, 1129.8287353515625, 1259.7545166015625, 1511.2264404296875, 1433.7176513671875, 217.75384521484375, 207.51461791992188, -63.26397705078125, 112.12247467041016, 1225.7237548828125, 825.3333740234375, -67.36328125, 571.8492431640625, 783.3688354492188, 1291.4365234375, -1046.8372802734375, 47.16230773925781, 247.5402374267578, 85.54663848876953, 912.2033081054688, 396.5034484863281, 252.12692260742188, 941.97998046875, 823.6060180664062, 1112.2694091796875, -234.91738891601562, 193.7705841064453, 1125.5594482421875, -203.02044677734375, 305.0973205566406, -387.367431640625, 1004.9254760742188, 392.98712158203125, -61.66925048828125, 952.431396484375, 448.811279296875, -1104.004638671875, 78.31153106689453, -25.8258056640625, 669.9063110351562, 1057.91162109375, 1045.140625, 278.18109130859375, 751.7311401367188, 907.1956787109375, 514.0526123046875, 178.1311798095703, -334.58447265625, 44.60346984863281, 1285.0833740234375, 510.3200378417969, 593.3598022460938, -380.3583679199219, 707.48974609375, -127.73469543457031, 823.4981689453125, 1372.0709228515625, 199.91033935546875, 1400.22265625, 1150.9189453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 520.080322265625, "std": 551.636474609375, "min": -355.84857177734375, "p10": 1.234500122070314, "median": 409.98704528808594, "p90": 1246.338073730469, "max": 2359.697998046875, "pos_frac": 0.90625, "sample": [140.10153198242188, 128.19509887695312, -320.6367492675781, 914.9312744140625, 254.36663818359375, 24.061033248901367, 861.01171875, 1179.22412109375, 1082.9356689453125, 1087.0411376953125, 97.2354736328125, 651.502685546875, 321.92816162109375, 77.98783874511719, 1565.560546875, 533.3908081054688, 301.7530517578125, 40.44545364379883, 2359.697998046875, 522.9601440429688, 1878.054931640625, 212.47145080566406, 112.42667388916016, 675.8718872070312, 193.25808715820312, 731.8740234375, 1964.51171875, 2.8839263916015625, 252.1645050048828, 346.9034118652344, 521.0786743164062, 613.4033203125, 451.8564147949219, -355.84857177734375, 547.529052734375, -86.73037719726562, 86.0755386352539, 669.1707763671875, 284.26019287109375, 774.808837890625, 1275.1011962890625, 386.3641357421875, 1392.0396728515625, 445.9881591796875, 218.9838409423828, 432.3795471191406, -145.96498107910156, 434.942626953125, 671.7476196289062, 742.3274536132812, 84.81595611572266, 0.5276031494140625, 891.2372436523438, 387.59454345703125, 130.42323303222656, 195.92910766601562, 101.52912902832031, 383.7201232910156, 1087.7689208984375, -349.8652648925781, 689.5820922851562, -92.24334716796875, 1415.187744140625, 805.3058471679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 292.92529296875, "std": 647.7742309570312, "min": -2507.311279296875, "p10": -341.5905517578124, "median": 278.0493469238281, "p90": 920.763775634766, "max": 2051.9033203125, "pos_frac": 0.734375, "sample": [558.5858154296875, 14.2100830078125, 509.455078125, 447.66387939453125, 147.39007568359375, 237.6658172607422, 730.5442504882812, 295.8274841308594, 825.4767456054688, -2507.311279296875, 1594.9659423828125, 324.2701110839844, 1415.18359375, 2051.9033203125, -587.6791381835938, 218.56369018554688, 283.4625244140625, -380.8725891113281, -11.288238525390625, -115.53533935546875, 56.53546142578125, 233.38206481933594, 450.77777099609375, 115.99158477783203, 52.01066589355469, 13.592330932617188, 160.40151977539062, -602.7899169921875, 562.0300903320312, 691.611083984375, -77.66300964355469, 692.638671875, -582.8375244140625, -673.8692016601562, 586.1698608398438, 1269.00048828125, 765.1920776367188, 375.20556640625, 372.83074951171875, 740.7620849609375, 576.859619140625, -249.93246459960938, 1037.754638671875, 451.08428955078125, 1806.52734375, 657.6824951171875, -118.64395904541016, 194.99551391601562, 83.83697509765625, 428.21954345703125, -58.21295166015625, 762.705810546875, 321.8557434082031, 961.60107421875, -20.263397216796875, 220.93443298339844, 608.4024658203125, 370.5887451171875, -645.693603515625, -45.73960876464844, 50.06584167480469, -156.12625122070312, 272.63616943359375, -17.374462127685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 588.1802978515625, "std": 841.1820068359375, "min": -687.7088623046875, "p10": -174.7675994873047, "median": 401.310302734375, "p90": 1465.6462158203126, "max": 4001.02685546875, "pos_frac": 0.765625, "sample": [706.2597045898438, 258.79437255859375, 1170.6844482421875, 417.9761047363281, -202.93423461914062, -123.08735656738281, 129.49124145507812, 647.6724853515625, 601.4105224609375, 448.7795104980469, 1347.4710693359375, -531.97412109375, 197.042724609375, 531.3941650390625, -104.16979217529297, 1066.1954345703125, -177.19265747070312, 732.1954956054688, 973.3341064453125, 1157.22021484375, -662.8984375, 1.6361427307128906, 594.3380737304688, -15.372434616088867, 1778.43994140625, 168.0477294921875, 395.67535400390625, 2561.56591796875, 1032.900634765625, 406.94525146484375, 1668.788818359375, 78.77153015136719, 566.0870971679688, 1147.9007568359375, 614.1735229492188, 1680.9088134765625, 1487.3062744140625, 217.90927124023438, 547.6658325195312, -123.79019927978516, 117.15192413330078, -49.9913330078125, 3435.155517578125, 1415.1060791015625, 766.73876953125, -185.55282592773438, 4001.02685546875, -169.109130859375, 554.2960205078125, -157.34823608398438, 1041.54541015625, 327.2081604003906, 907.6224975585938, -687.7088623046875, 267.0543212890625, 368.39093017578125, 349.979248046875, 198.55970764160156, -377.8475341796875, 1264.795166015625, -42.869850158691406, 225.24525451660156, 302.3560791015625, 378.17498779296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 444.05242919921875, "std": 663.3372192382812, "min": -857.6484985351562, "p10": -321.23723754882803, "median": 356.3833923339844, "p90": 1352.1934814453125, "max": 2051.80126953125, "pos_frac": 0.71875, "sample": [293.74407958984375, -13.078266143798828, 433.4071044921875, 1437.67041015625, 1639.77978515625, 172.67926025390625, 648.0778198242188, 45.11382293701172, -204.18162536621094, 53.60601806640625, 156.88546752929688, 142.52716064453125, -857.6484985351562, 1190.8428955078125, 1001.1104736328125, 1165.8582763671875, -85.63233947753906, -170.884033203125, 1637.212158203125, 503.1104736328125, 2051.80126953125, 589.69287109375, 204.15060424804688, -215.86932373046875, -76.88631439208984, -355.0504455566406, 556.8148803710938, 913.1554565429688, -575.8780517578125, -451.8084411621094, 896.7622680664062, 316.455322265625, 893.3726196289062, 1169.2412109375, 1361.310302734375, 849.1357421875, 514.0543212890625, 700.8668823242188, 141.65721130371094, 100.25778198242188, -116.5978012084961, 439.31768798828125, 536.4349975585938, 201.60614013671875, 1635.4805908203125, 1309.947998046875, 1330.9208984375, 246.42596435546875, -449.4130859375, -824.880859375, -242.33975219726562, -215.53932189941406, 1426.8604736328125, 929.9755859375, -637.7521362304688, -6.016916275024414, 1198.174560546875, 396.31146240234375, 539.5794067382812, 100.2894287109375, 1235.7412109375, 595.6990356445312, -56.49555969238281, 72.18699645996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 409.11669921875, "std": 549.7535400390625, "min": -1073.215087890625, "p10": -170.7123306274414, "median": 428.0638427734375, "p90": 1148.089489746094, "max": 1816.5673828125, "pos_frac": 0.8125, "sample": [226.31259155273438, 1224.5128173828125, -405.2945861816406, -444.17706298828125, 368.4332275390625, 427.24664306640625, 393.91094970703125, 718.0515747070312, 1393.4669189453125, 632.0990600585938, 462.43878173828125, 787.5469360351562, -28.822731018066406, 467.76531982421875, 1330.10986328125, -160.58319091796875, 101.94698333740234, 62.573490142822266, 29.978464126586914, 543.55810546875, 30.23919677734375, 1098.9439697265625, 577.8228759765625, -145.27723693847656, 133.77272033691406, 518.1197509765625, -904.206298828125, -82.02338409423828, 1005.8146362304688, 797.5267944335938, 125.41071319580078, 862.6847534179688, 1489.514892578125, 1816.5673828125, 316.9024353027344, 125.40218353271484, -1073.215087890625, 1169.15185546875, 1035.986083984375, -175.0533905029297, -332.468017578125, -610.3490600585938, 341.83837890625, 338.07586669921875, 637.6444091796875, 1284.7950439453125, 330.4386901855469, 459.35284423828125, 740.9744873046875, 22.37148094177246, 857.870361328125, 285.3270263671875, 118.25201416015625, 643.5460205078125, 275.7476806640625, 591.1653442382812, 58.35419464111328, 660.3436279296875, 839.4552001953125, 465.470947265625, 428.88104248046875, 503.1309814453125, 447.185302734375, -59.094512939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 245.02308654785156, "std": 726.9574584960938, "min": -1483.20556640625, "p10": -507.95203857421876, "median": 171.73878479003906, "p90": 1034.6023193359376, "max": 3443.33642578125, "pos_frac": 0.671875, "sample": [1123.2806396484375, -1280.0107421875, -609.6661987304688, 426.5964660644531, -236.43728637695312, 753.1434936523438, 75.84423065185547, -241.74249267578125, 2398.7177734375, -234.40771484375, 721.609375, -64.49537658691406, 224.48153686523438, 996.1180419921875, 800.2843627929688, -1483.20556640625, 84.09854125976562, -655.6524658203125, 276.00439453125, 229.59169006347656, 416.2773132324219, -167.18592834472656, 153.01927185058594, 1481.7637939453125, -208.24391174316406, 3443.33642578125, 59.24418640136719, 1051.0955810546875, 124.73470306396484, -69.08967590332031, 34.79846954345703, -374.80364990234375, 286.6386413574219, -512.7169189453125, 418.6629638671875, -532.2978515625, -249.19395446777344, 482.6258239746094, 168.99560546875, 422.916015625, 220.9481201171875, -218.77996826171875, 351.2060241699219, -496.833984375, 176.3721923828125, 399.59710693359375, 193.12225341796875, 293.017822265625, 735.7696533203125, -45.90796661376953, 9.021682739257812, 1190.8450927734375, 716.4335327148438, 169.0986785888672, -298.96173095703125, 125.23200988769531, 430.6276550292969, 146.1162567138672, 625.0574951171875, -524.6958618164062, 1178.82666015625, 174.37889099121094, 402.007080078125, -5.750734329223633], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 373.1282043457031, "std": 675.4996337890625, "min": -1982.062744140625, "p10": -386.4982696533203, "median": 294.79627990722656, "p90": 1189.7163208007814, "max": 2040.3751220703125, "pos_frac": 0.703125, "sample": [1113.388916015625, 1001.4645385742188, -405.9872741699219, 250.74168395996094, -269.7752685546875, 520.67919921875, -207.12399291992188, -81.84851837158203, -213.10549926757812, -341.02392578125, 1751.8392333984375, 1261.1549072265625, -474.2709655761719, 427.6062316894531, -471.785400390625, 120.91310119628906, 550.6591796875, 856.357177734375, 16.13239288330078, -439.9454650878906, 168.82485961914062, -53.33815002441406, 297.29656982421875, -729.1803588867188, 1053.582275390625, -34.37655258178711, 235.49093627929688, -97.31455993652344, 749.60595703125, 704.018310546875, -1982.062744140625, 513.0810546875, 2040.3751220703125, 578.3627319335938, 382.36212158203125, 664.0995483398438, 292.2959899902344, -175.31568908691406, -188.696044921875, 1159.094482421875, 105.67838287353516, 573.0594482421875, 1611.19140625, 903.9358520507812, 519.0587158203125, -775.6209106445312, 1238.5552978515625, 290.76611328125, 1580.7646484375, 511.92852783203125, 82.75524139404297, 736.6083374023438, -171.98870849609375, 412.4895935058594, 277.67803955078125, 251.7066650390625, 1202.8399658203125, 750.643310546875, 1041.9376220703125, -155.36514282226562, 228.700927734375, 1009.6026611328125, 240.74447631835938, 868.2587890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 403.1651916503906, "std": 654.1398315429688, "min": -1255.99658203125, "p10": -382.8584106445312, "median": 407.4291687011719, "p90": 1172.0635498046877, "max": 2273.914306640625, "pos_frac": 0.75, "sample": [742.849853515625, -780.7498168945312, 292.5704345703125, 392.040283203125, 625.3048095703125, -1212.5234375, 1180.8455810546875, 1087.1673583984375, 1400.91162109375, 991.4448852539062, 773.26123046875, 712.9258422851562, 196.0245361328125, 986.464111328125, 208.66268920898438, 215.01419067382812, 561.334716796875, 420.3642883300781, 81.99290466308594, 54.357154846191406, 1554.37841796875, 394.4940490722656, 712.78125, 492.90484619140625, -492.09490966796875, 1138.0343017578125, 566.9296264648438, -391.3460998535156, 475.861083984375, -187.94696044921875, -1255.99658203125, 472.8078918457031, 1012.7571411132812, 1115.6258544921875, 630.0145263671875, 840.32275390625, 1097.2159423828125, 782.6561889648438, 374.6998596191406, -363.0538024902344, 1327.2532958984375, -110.17792510986328, -221.59259033203125, -112.16986083984375, 614.7490844726562, 268.44189453125, 688.91357421875, 236.08123779296875, 1226.550048828125, 2273.914306640625, 1238.87451171875, -167.24118041992188, -424.34942626953125, 158.29617309570312, 145.9517059326172, -252.7123260498047, 424.6086730957031, 291.35247802734375, -660.8528442382812, 48.11236572265625, 82.58464050292969, -191.3817901611328, -135.51307678222656, 1151.5721435546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 289.7676696777344, "std": 613.7915649414062, "min": -1262.23046875, "p10": -407.9066955566406, "median": 272.8928680419922, "p90": 1124.6555297851562, "max": 1683.973388671875, "pos_frac": 0.703125, "sample": [-416.0045166015625, 1293.9378662109375, 868.6880493164062, -870.32177734375, 303.223388671875, 335.38458251953125, -493.03228759765625, -92.96609497070312, 1395.8939208984375, 147.71810913085938, 264.4504699707031, 98.35015869140625, 425.13421630859375, -1262.23046875, 818.0360107421875, 199.0166778564453, 157.7695770263672, 1128.47119140625, 403.0582580566406, -232.54074096679688, -169.7357177734375, 357.6625671386719, 311.984375, -43.636314392089844, 728.7545166015625, 286.4586486816406, -69.71995544433594, -137.080810546875, -790.99267578125, 1047.3837890625, 411.4110107421875, 264.72503662109375, 1163.7398681640625, 426.4468078613281, 717.4324951171875, -64.62178039550781, -332.72802734375, -1047.415771484375, -389.01177978515625, 226.178466796875, 1422.9873046875, 442.3254089355469, 251.91815185546875, -840.52197265625, 578.7891845703125, 626.0305786132812, 495.14508056640625, 1683.973388671875, 1115.7523193359375, 979.341552734375, 108.98184204101562, 281.0606994628906, -309.7138366699219, 1038.1016845703125, 306.4136962890625, -99.76426696777344, 212.81459045410156, 482.93310546875, 1521.2403564453125, 95.24087524414062, 496.57373046875, -75.22799682617188, 103.43994903564453, 258.0240478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 245.52960205078125, "std": 608.4852905273438, "min": -944.908935546875, "p10": -460.3481506347656, "median": 177.92864227294922, "p90": 1091.2878540039067, "max": 2288.95751953125, "pos_frac": 0.6875, "sample": [180.84730529785156, 1000.4678955078125, -262.1315612792969, 1516.3349609375, -759.2164306640625, 24.984373092651367, 31.997278213500977, 175.00997924804688, 283.57598876953125, 1292.6636962890625, 49.03460693359375, 196.165283203125, -93.97345733642578, 372.37762451171875, 661.9583740234375, 217.91390991210938, 323.591796875, -234.2740478515625, -698.6790771484375, 1147.269775390625, 1130.210693359375, -152.2312469482422, -108.02980041503906, 106.71232604980469, 40.421356201171875, 91.14128875732422, 172.50332641601562, 654.9197998046875, 479.9668884277344, 59.7737922668457, 2288.95751953125, 322.8837585449219, 762.6114501953125, -944.908935546875, 934.4334716796875, 1174.4151611328125, 624.8328857421875, -344.0384521484375, -473.91339111328125, 229.67630004882812, 769.7976684570312, -188.6013946533203, -68.98645782470703, -925.795166015625, 205.9890594482422, -603.4475708007812, 130.31834411621094, -654.6220703125, -260.80322265625, 835.4818115234375, -428.6959228515625, 806.6051025390625, 406.112548828125, 1372.244384765625, 397.1616516113281, 114.56584167480469, 256.9768371582031, 692.259033203125, -96.6567611694336, 267.7528381347656, 4.970123291015625, -51.50492858886719, -345.44189453125, 601.95751953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 355.125732421875, "std": 636.342529296875, "min": -999.5966796875, "p10": -273.24526977539057, "median": 319.90899658203125, "p90": 991.4952514648438, "max": 2443.7587890625, "pos_frac": 0.71875, "sample": [952.3391723632812, 178.41822814941406, 2443.7587890625, -200.978515625, 461.0486755371094, -100.2608871459961, 79.83856201171875, 9.979137420654297, -10.760322570800781, 518.1040649414062, 191.0166015625, 1434.1126708984375, -48.85496520996094, 843.09521484375, -543.433349609375, 333.49566650390625, 489.3353576660156, -14.199127197265625, 710.9151611328125, 26.385543823242188, 349.57659912109375, 42.12809753417969, 700.8687133789062, 994.0771484375, 371.214599609375, 48.332557678222656, 11.100448608398438, 456.9229431152344, -109.36244201660156, 1703.797607421875, -43.79987335205078, 398.7957763671875, 283.2231750488281, 1288.101318359375, 733.2810668945312, -134.0562744140625, 52.75493621826172, 0.6103324890136719, 287.7254943847656, 985.4708251953125, -132.83949279785156, 958.677734375, 887.3721923828125, 365.9472961425781, -775.5394287109375, -153.82162475585938, 684.7227783203125, -54.386985778808594, -304.21673583984375, 494.3621826171875, -428.48663330078125, -451.92071533203125, -999.5966796875, 751.1619262695312, 1180.8125, 316.6058349609375, 403.4565124511719, 985.40087890625, -821.1844482421875, 2256.78173828125, 168.17434692382812, 557.8677978515625, 323.212158203125, 341.3651123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 424.83349609375, "std": 642.5899047851562, "min": -1163.353759765625, "p10": -244.0883056640625, "median": 357.68601989746094, "p90": 991.9915588378907, "max": 3377.95458984375, "pos_frac": 0.78125, "sample": [44.231468200683594, -415.12548828125, 95.09616088867188, -200.60366821289062, 306.9792175292969, -378.2850341796875, 381.65704345703125, 368.13787841796875, 449.0265808105469, 101.94210052490234, 821.0577392578125, -338.35577392578125, 400.078857421875, 1159.406005859375, 50.825645446777344, 997.3981323242188, -32.7457275390625, 2198.8525390625, 1220.42333984375, 486.93695068359375, 248.72218322753906, 982.9082641601562, -79.97830200195312, -213.71437072753906, 735.9728393554688, 360.7252502441406, 950.3905639648438, 454.5562744140625, 192.5170135498047, 3377.95458984375, 12.094100952148438, -247.92750549316406, 979.5255126953125, 1332.5240478515625, -248.53646850585938, 926.0845947265625, 995.8843994140625, 370.9861145019531, 666.8469848632812, -228.7193603515625, 735.9766845703125, 195.9973602294922, 798.7303466796875, 799.381103515625, 708.6259155273438, 330.12969970703125, 692.9006958007812, 82.50747680664062, 294.94677734375, -235.1301727294922, 184.8976287841797, 11.526519775390625, -1163.353759765625, 354.64678955078125, 352.0169372558594, -260.01068115234375, 853.9183959960938, 681.043701171875, 276.0658874511719, 488.43658447265625, -8.574050903320312, 265.4372253417969, 547.9859619140625, 915.4899291992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 468.7959899902344, "std": 656.7310791015625, "min": -1002.8700561523438, "p10": -96.77195281982421, "median": 320.29620361328125, "p90": 1465.9671875000001, "max": 2425.235107421875, "pos_frac": 0.828125, "sample": [140.28814697265625, 63.180931091308594, 474.3515930175781, 44.05828094482422, 71.76632690429688, 962.033203125, 317.5784912109375, 65.65484619140625, 625.903564453125, 1219.907470703125, 758.5335693359375, -1002.8700561523438, 111.57781982421875, 182.97018432617188, -242.61412048339844, 697.8405151367188, 846.668212890625, 1544.017578125, 949.6504516601562, 654.8665771484375, 1431.593017578125, 358.0037841796875, 239.31204223632812, 2425.235107421875, 77.88809204101562, 537.0592651367188, -271.4522399902344, -33.98672866821289, 208.60250854492188, -59.949127197265625, 392.7628479003906, -227.62582397460938, -246.6378173828125, 691.1956787109375, 341.6949157714844, -100.06570434570312, 245.04811096191406, 618.7646484375, 881.0028076171875, 225.6923828125, 543.6104736328125, 360.51873779296875, 198.85763549804688, 970.134521484375, 898.0887451171875, 557.5263061523438, 1480.698974609375, 1.3481521606445312, 1740.374267578125, -916.8714599609375, 1299.1695556640625, 37.82383728027344, 323.013916015625, 1768.45947265625, 437.6043701171875, 81.71623229980469, 112.60237121582031, 1484.1339111328125, -89.08653259277344, 2293.01806640625, 14.236076354980469, 125.92881774902344, 127.44189453125, -66.90726470947266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 374.45556640625, "std": 713.5652465820312, "min": -3224.827392578125, "p10": -122.10970001220701, "median": 272.4068908691406, "p90": 1190.7811157226565, "max": 1977.989013671875, "pos_frac": 0.765625, "sample": [1318.81005859375, -911.1199340820312, -3224.827392578125, 926.005126953125, 259.12762451171875, 159.14154052734375, 23.513303756713867, 67.67620849609375, 1213.6175537109375, 265.2380676269531, 752.1702880859375, 1420.59423828125, 95.35154724121094, 756.5447387695312, 177.79600524902344, 488.41162109375, 718.6958618164062, -504.5546569824219, 498.85150146484375, 44.99055862426758, 445.4592590332031, 49.25707244873047, -209.893310546875, 148.0328369140625, 279.5757141113281, 893.2748413085938, 1515.015625, -18.969013214111328, 670.216064453125, 287.6935729980469, 698.3778076171875, 212.73416137695312, 749.1111450195312, -55.682640075683594, -105.13633728027344, 876.4306640625, 1059.6832275390625, -57.53599166870117, 397.1812438964844, -309.2929992675781, -2.3222484588623047, 562.3629150390625, 301.1488952636719, 1977.989013671875, -126.36229705810547, -37.766448974609375, 870.5883178710938, 126.42625427246094, 573.6492309570312, 1134.5980224609375, -96.1136474609375, -156.85763549804688, 1137.3306884765625, 502.07080078125, 167.43096923828125, 108.21286010742188, 80.7239761352539, 16.807653427124023, 857.9969482421875, 1137.49609375, -112.18697357177734, 50.01836395263672, 1541.4468994140625, 1278.8980712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 169.66934204101562, "std": 701.3534545898438, "min": -1304.256103515625, "p10": -742.8577697753906, "median": 151.6739730834961, "p90": 1085.5047241210943, "max": 1704.3355712890625, "pos_frac": 0.625, "sample": [1696.852783203125, -938.7137451171875, -542.1296997070312, 1182.700927734375, 946.4693603515625, -416.74688720703125, 343.8635559082031, 345.7923583984375, -602.5569458007812, 888.3844604492188, 1573.56884765625, -46.9096565246582, 443.5492858886719, 575.8546752929688, -1010.1527709960938, 274.3006286621094, -486.336669921875, -395.7064208984375, 132.76254272460938, 466.7178649902344, 801.9769287109375, -1304.256103515625, 798.9880981445312, 622.5680541992188, -651.9102783203125, -1192.412353515625, -1048.2222900390625, 264.5160217285156, 166.61447143554688, 34.97187805175781, 601.6423950195312, 557.7199096679688, 85.07744598388672, -643.1525268554688, 1193.1767578125, 1145.09130859375, -302.18255615234375, 909.6824951171875, -411.672607421875, -38.999366760253906, 137.62423706054688, -112.71487426757812, 635.5814208984375, 364.2026672363281, 1704.3355712890625, -781.8352661132812, -195.02383422851562, 114.33931732177734, -343.413818359375, -182.66558837890625, 96.04121398925781, 744.8756103515625, 254.2106475830078, 914.9439697265625, -837.05908203125, -335.0561828613281, -563.629638671875, 152.32691955566406, 115.25758361816406, 701.8239135742188, 300.2757873535156, 1215.088623046875, 587.5042114257812, 151.02102661132812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 565.5443725585938, "std": 766.1266479492188, "min": -831.3593139648438, "p10": -185.45104980468747, "median": 434.0885925292969, "p90": 1443.8938354492188, "max": 2989.343994140625, "pos_frac": 0.78125, "sample": [763.9058227539062, 553.5283203125, -520.4928588867188, -142.3965301513672, 123.4695053100586, 238.29745483398438, 427.41595458984375, 1362.165771484375, 1235.0526123046875, 202.70120239257812, 1488.073486328125, 1502.9901123046875, 1004.6181640625, 129.67068481445312, -294.3896789550781, 488.5508728027344, 894.0523681640625, -163.279052734375, -19.326126098632812, 1444.884765625, -771.648193359375, 1050.7225341796875, 359.19573974609375, 715.973388671875, -192.45291137695312, -831.3593139648438, 159.55477905273438, 398.627197265625, 2989.343994140625, 842.962158203125, 610.0844116210938, 1068.4542236328125, -543.5209350585938, 626.8731079101562, 2492.587158203125, 299.5932312011719, -169.11337280273438, 267.6751708984375, 10.60626220703125, 447.885009765625, -63.61714172363281, 210.0591583251953, 28.242721557617188, 1117.3421630859375, 1061.6629638671875, 105.04556274414062, 894.3906860351562, 807.28662109375, -487.8404235839844, 1278.31591796875, 2225.55322265625, 365.2167663574219, -133.84255981445312, 1192.07373046875, 64.59620666503906, 727.89208984375, 440.76123046875, 824.3671264648438, 138.58680725097656, 774.7135009765625, 1441.5816650390625, 2506.636474609375, -96.91111755371094, 221.19076538085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 355.42193603515625, "std": 738.5091552734375, "min": -1274.910888671875, "p10": -530.9949462890625, "median": 374.2059783935547, "p90": 1115.8634155273442, "max": 3000.9091796875, "pos_frac": 0.734375, "sample": [176.87942504882812, 275.2354736328125, 547.8826293945312, 1028.87158203125, -523.094482421875, -85.65670776367188, 384.1915283203125, 1536.88330078125, 426.743896484375, 105.40303039550781, -1101.6593017578125, 541.9339599609375, 43.16993713378906, -405.1363525390625, 3000.9091796875, -522.2921752929688, -573.3816528320312, -534.380859375, -45.73158264160156, 854.9591064453125, -132.22927856445312, 167.46224975585938, 99.6380615234375, 583.2459106445312, 200.86953735351562, 574.9422607421875, -1197.546142578125, 822.362060546875, 613.9422607421875, 150.2474822998047, 922.9505615234375, 511.44989013671875, 519.618408203125, 64.93389892578125, 147.218994140625, -63.09695816040039, 1294.7247314453125, 400.29339599609375, 822.6930541992188, 585.0418090820312, 168.09600830078125, 0.2291278839111328, -73.7696304321289, -601.1327514648438, 233.6051025390625, 852.6572875976562, -1274.910888671875, 1153.1456298828125, 835.3981323242188, 917.1319580078125, 2335.365478515625, 723.6911010742188, -1056.791748046875, 728.821533203125, -54.947914123535156, 346.0354919433594, 672.945556640625, -270.04461669921875, 566.4671630859375, 364.2204284667969, 737.350830078125, 1263.9013671875, 634.8741455078125, 1324.171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 441.33770751953125, "std": 897.9398193359375, "min": -1737.8504638671875, "p10": -380.63308715820307, "median": 362.73887634277344, "p90": 1372.1760131835947, "max": 4653.92578125, "pos_frac": 0.734375, "sample": [90.44898986816406, 568.69384765625, 730.9180908203125, 652.3412475585938, 217.2607879638672, -1737.8504638671875, 746.5564575195312, 370.6141662597656, -70.35194396972656, -488.4686279296875, -100.49793243408203, 113.06430053710938, 1465.0052490234375, -401.1849365234375, 821.0400390625, 987.7096557617188, 468.49676513671875, 817.8648071289062, -925.5432739257812, 49.43905258178711, 156.01657104492188, -61.73004150390625, 317.39996337890625, -21.260210037231445, 354.86358642578125, 631.9542846679688, -179.00570678710938, 209.5576171875, 99.16558837890625, 484.7867736816406, 831.92236328125, 654.4017944335938, 660.6708984375, 2442.586181640625, -287.7214050292969, 112.55618286132812, 347.4552001953125, 733.130126953125, -503.96533203125, 381.5955505371094, 820.9303588867188, 174.78770446777344, 455.6270446777344, 1551.4423828125, -332.67877197265625, 1155.574462890625, 845.2896728515625, 1668.8104248046875, -170.24757385253906, 16.5755615234375, 4653.92578125, 959.59912109375, -647.0324096679688, 469.4898681640625, -277.16949462890625, -179.2095184326172, 397.32598876953125, 2515.5947265625, 59.172088623046875, -1043.20263671875, 464.8946533203125, 1707.7802734375, 235.8548583984375, 1002.5435180664062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 343.71783447265625, "std": 546.9442138671875, "min": -803.125244140625, "p10": -357.1795166015625, "median": 305.22422790527344, "p90": 1007.3739746093751, "max": 1832.560302734375, "pos_frac": 0.78125, "sample": [288.3361511230469, 197.05601501464844, 179.46043395996094, 826.6237182617188, 893.5696411132812, 669.2442626953125, 464.0394287109375, -72.06340789794922, 392.61590576171875, 776.7174072265625, 18.705331802368164, -748.70166015625, 328.3678283691406, 293.9100646972656, -67.74244689941406, 967.215576171875, 848.5060424804688, 1107.822021484375, 1069.78662109375, 502.54803466796875, 211.22853088378906, 40.271183013916016, -353.1458435058594, 207.45724487304688, 86.42677307128906, 139.31185913085938, -803.125244140625, 637.1383056640625, 457.0097961425781, 936.5244140625, -530.1454467773438, 243.6551971435547, 437.49365234375, 297.9180908203125, 845.2479248046875, 1209.468994140625, 361.4653015136719, -532.9637451171875, 673.4986572265625, 312.5303649902344, -337.67694091796875, 1785.7540283203125, 1115.863037109375, 370.4014892578125, 675.6311645507812, 114.91510772705078, 2.9806900024414062, 413.20709228515625, 1832.560302734375, 55.41494369506836, 992.781982421875, 688.6182861328125, 562.8416748046875, -283.75555419921875, 1013.627685546875, 42.858062744140625, -362.6920166015625, -358.9082336425781, 389.5016174316406, -10.459564208984375, -162.85064697265625, 103.39639282226562, -639.17578125, 179.823974609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 347.21612548828125, "std": 714.92041015625, "min": -1851.44482421875, "p10": -432.6029754638671, "median": 202.8726043701172, "p90": 1194.6079467773438, "max": 2413.385009765625, "pos_frac": 0.71875, "sample": [2413.385009765625, 92.48665618896484, 823.5262451171875, -1851.44482421875, 461.0341796875, 393.46112060546875, -728.1348876953125, 49.00270080566406, 807.318115234375, 334.06817626953125, 46.02526092529297, -27.435592651367188, 76.82648468017578, 404.17236328125, 146.8492889404297, 636.388671875, 202.76300048828125, -40.18792724609375, 202.98220825195312, -462.1467590332031, 164.59646606445312, 2090.286376953125, 67.06849670410156, -608.8231201171875, 726.1551513671875, 825.71337890625, -1.1976966857910156, 388.98822021484375, -363.66748046875, 1614.6468505859375, -67.34941864013672, -355.1210632324219, 806.2152099609375, -120.35740661621094, 38.30226135253906, -29.813451766967773, 1212.4478759765625, 611.0548095703125, 745.1607055664062, 298.00811767578125, -8.902870178222656, -488.6141052246094, 580.52685546875, 55.18012237548828, 1131.489990234375, 548.2203369140625, -337.47021484375, -243.79913330078125, 128.11009216308594, 1071.1051025390625, 1410.158447265625, 776.2567138671875, 68.15135192871094, -1047.9136962890625, 1007.5158081054688, 1152.9814453125, 319.4715576171875, 1323.226806640625, 25.597198486328125, 551.3995971679688, 649.2686157226562, 1835.9920654296875, 191.06549072265625, -500.4408874511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 178.1243896484375, "std": 704.0673217773438, "min": -1316.3023681640625, "p10": -656.9836791992187, "median": 110.71925354003906, "p90": 1106.4225830078128, "max": 2242.4404296875, "pos_frac": 0.625, "sample": [420.2395324707031, 1026.271728515625, -659.6767578125, 922.6336059570312, -1316.3023681640625, -1263.76025390625, -650.6998291015625, 38.350433349609375, 403.8281555175781, -215.3774871826172, -122.64161682128906, -56.8944091796875, 50.425086975097656, -6.183841705322266, -87.71884155273438, -322.0638427734375, 409.86004638671875, 335.5418395996094, 931.3359985351562, 1255.2174072265625, 682.4002685546875, -395.8201904296875, 80.20074462890625, 22.705825805664062, 112.43734741210938, 100.27354431152344, 1140.77294921875, -16.210792541503906, -783.9743041992188, -1134.863037109375, 192.00074768066406, 379.24566650390625, -566.645263671875, 1335.6517333984375, 839.0819702148438, -22.455780029296875, 1817.76123046875, 744.2003173828125, -1192.32080078125, -647.1506958007812, 1332.947265625, 39.752777099609375, 123.749267578125, 174.06459045410156, 460.32763671875, 584.0547485351562, 375.43023681640625, 312.4923095703125, 172.30908203125, 739.2194213867188, 109.00115966796875, -310.89349365234375, 1288.156494140625, 7.563713073730469, 482.2205810546875, 2242.4404296875, -172.46713256835938, 350.70269775390625, -942.56494140625, -131.06576538085938, 643.16162109375, 206.34169006347656, -109.24357604980469, -357.41485595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 309.60821533203125, "std": 582.269287109375, "min": -1011.7897338867188, "p10": -329.3345062255859, "median": 238.01132202148438, "p90": 1112.2093872070313, "max": 2033.6873779296875, "pos_frac": 0.71875, "sample": [931.056396484375, 1159.0001220703125, -98.66602325439453, 1332.76220703125, 638.6878662109375, 235.6150665283203, 500.65887451171875, 667.0855712890625, 980.7498779296875, 26.652786254882812, 405.6292419433594, 517.7622680664062, -937.2227783203125, -30.067169189453125, 1125.674560546875, 142.38795471191406, -338.8603210449219, 299.8489990234375, 1109.57080078125, -56.13579559326172, 445.3779296875, -110.58971405029297, -192.68133544921875, 596.1649169921875, 64.99783325195312, 292.67047119140625, 472.16033935546875, 242.51414489746094, 66.2542724609375, 772.42236328125, 40.736263275146484, 448.5705261230469, 123.42678833007812, 240.40757751464844, -200.5984344482422, 191.10426330566406, 61.73687744140625, 1574.4996337890625, -288.03546142578125, -1011.7897338867188, 390.6334228515625, 667.752685546875, 195.0290069580078, -376.4459228515625, -47.35020446777344, -481.41375732421875, -76.94212341308594, 50.90062713623047, -557.4148559570312, 2033.6873779296875, 1083.316162109375, 287.24383544921875, 514.6276245117188, -57.47578430175781, 1379.1954345703125, -307.10760498046875, -670.3019409179688, 73.23725128173828, 199.50828552246094, 127.04785919189453, 1113.3402099609375, 1048.29345703125, 477.528076171875, 306.498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 378.85992431640625, "std": 538.0174560546875, "min": -1227.3377685546875, "p10": -212.59210052490232, "median": 391.65309143066406, "p90": 1064.5028564453125, "max": 1957.7796630859375, "pos_frac": 0.765625, "sample": [500.307861328125, 600.5816040039062, 294.71160888671875, 257.229248046875, 492.9176940917969, 1076.6297607421875, 322.0400085449219, 429.7026672363281, 568.7610473632812, 575.9791870117188, 1594.927734375, 154.85671997070312, 42.928436279296875, 1719.8204345703125, 738.977783203125, 492.0447998046875, -34.83753967285156, 1070.1304931640625, 588.927734375, -190.85812377929688, 230.68704223632812, 255.91558837890625, -49.84614562988281, -221.9066619873047, 536.5325317382812, 410.8365478515625, -78.33503723144531, 1957.7796630859375, -721.2927856445312, 757.1788940429688, -263.49017333984375, -238.0423583984375, 115.65341186523438, 844.50048828125, 1171.22802734375, 717.003662109375, 426.31427001953125, 350.10015869140625, -540.0808715820312, 208.41087341308594, 621.121337890625, -0.14545440673828125, 1184.2650146484375, -187.3165283203125, -54.48427963256836, 416.41131591796875, 380.8976745605469, 428.0753479003906, -54.190208435058594, 1051.3717041015625, 191.02593994140625, 109.34711456298828, 1051.287109375, 246.70556640625, 41.80192565917969, 555.3041381835938, -1227.3377685546875, 649.7605590820312, -273.1564636230469, 387.21759033203125, 479.8660583496094, 545.5202026367188, 396.0885925292969, 142.67208862304688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 564.3632202148438, "std": 768.7049560546875, "min": -990.97314453125, "p10": -169.8297256469726, "median": 510.15476989746094, "p90": 1539.9658325195317, "max": 3554.892822265625, "pos_frac": 0.75, "sample": [494.9288024902344, 303.07861328125, 130.17169189453125, 493.0276794433594, 275.4832458496094, 683.179443359375, 1950.7835693359375, 1587.608154296875, 653.328857421875, 1428.8004150390625, 83.58383178710938, -706.92919921875, -56.007904052734375, 938.7699584960938, 270.890380859375, -118.78602600097656, 1308.8983154296875, 554.7162475585938, -32.85762023925781, -65.834716796875, -34.26417541503906, 555.1634521484375, -70.03742218017578, 975.4942626953125, -220.1538543701172, 1688.9305419921875, 671.1691284179688, 126.11711120605469, -28.267532348632812, -93.15654754638672, 777.41650390625, 731.202880859375, -191.70559692382812, 3554.892822265625, 644.7771606445312, 539.2637939453125, -990.97314453125, 612.7444458007812, 127.16021728515625, -637.0159912109375, -294.1007080078125, 305.85809326171875, 2298.075439453125, 593.3126220703125, 620.6817626953125, 481.26385498046875, 1393.425537109375, -49.6116943359375, 655.467529296875, 698.6163940429688, 1064.253662109375, 384.70892333984375, 342.0192565917969, 536.8191528320312, 1246.5400390625, 118.3766098022461, 525.3807373046875, 799.17919921875, 277.6712341308594, 2208.229736328125, 1324.24462890625, 242.80096435546875, -418.9382019042969, 1849.37744140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 258.40460205078125, "std": 651.8125, "min": -934.8194580078125, "p10": -442.2128692626953, "median": 200.32604217529297, "p90": 1003.2693481445315, "max": 3071.688232421875, "pos_frac": 0.59375, "sample": [533.939208984375, 19.65735626220703, 387.49468994140625, -209.25473022460938, 512.425537109375, -360.1493835449219, -16.32876205444336, 190.68524169921875, 744.6781005859375, 387.32269287109375, 522.58349609375, -154.56051635742188, -487.91302490234375, 98.10081481933594, -451.9938659667969, -173.66268920898438, -91.97209167480469, 422.1777038574219, -337.2483215332031, 274.0621337890625, 633.8872680664062, 133.4022674560547, -480.255126953125, -178.00302124023438, 958.5601806640625, -851.6135864257812, 721.8772583007812, 842.1338500976562, -110.33250427246094, 556.7714233398438, 404.7171325683594, -340.91162109375, 47.904541015625, -30.894981384277344, 674.92431640625, 518.5068359375, 850.9288330078125, 342.280517578125, 841.9115600585938, -341.31396484375, 1126.396728515625, 417.92919921875, -0.5340156555175781, 1095.334228515625, 831.92138671875, -327.2269592285156, 1460.059326171875, 3071.688232421875, 375.8951416015625, 367.90240478515625, -85.76123809814453, 209.9668426513672, -134.7415008544922, 956.6272583007812, -419.56329345703125, 1222.8897705078125, 1044.09814453125, -451.9198303222656, -5.332141876220703, 0.31304359436035156, -910.067138671875, -934.8194580078125, 1022.430419921875, -400.1175231933594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 419.18524169921875, "std": 732.1956787109375, "min": -1258.7802734375, "p10": -534.1954315185546, "median": 356.8896942138672, "p90": 1320.8575927734375, "max": 2464.251953125, "pos_frac": 0.75, "sample": [101.19129180908203, -581.1116333007812, 822.034912109375, 704.4813842773438, 258.1465759277344, 946.17138671875, 308.7984924316406, -1108.35595703125, 2245.823974609375, 638.3130493164062, 1135.36669921875, -677.944580078125, 1305.8798828125, 936.3475952148438, 944.6564331054688, 1327.276611328125, 335.39886474609375, 1729.865478515625, 1150.7333984375, 2464.251953125, 784.6428833007812, -82.37639617919922, 1525.73779296875, 652.451416015625, 304.1783752441406, -9.770984649658203, -261.18121337890625, 599.58935546875, 205.46755981445312, 450.3841857910156, 509.32281494140625, -105.99153137207031, -326.13360595703125, -975.3292236328125, -414.71453857421875, 283.7995910644531, -35.34676742553711, 1833.8837890625, -118.29588317871094, 57.7322998046875, 449.22900390625, 319.1407775878906, -504.0851135253906, 677.2885131835938, 538.5059814453125, 561.725341796875, 92.10124206542969, 693.566650390625, 130.83270263671875, 1448.6343994140625, 281.28607177734375, 378.3805236816406, 595.4093017578125, 1037.577880859375, 883.116943359375, 144.52496337890625, 449.65936279296875, -567.8535766601562, -1258.7802734375, 254.18051147460938, 735.1446533203125, 99.11420440673828, -547.099853515625, 70.87979888916016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 555.9376220703125, "std": 601.1331176757812, "min": -449.48211669921875, "p10": -55.80458145141598, "median": 473.6164245605469, "p90": 1306.4347778320316, "max": 2293.60107421875, "pos_frac": 0.859375, "sample": [852.2949829101562, 606.5862426757812, 2293.60107421875, 230.887451171875, -309.09283447265625, -14.0562744140625, 405.97418212890625, 595.09619140625, 48.19694519042969, 166.9443817138672, 1194.982421875, 2048.41943359375, 295.2295837402344, -191.58709716796875, 32.8074951171875, 27.340675354003906, 70.94815063476562, 496.43768310546875, -237.48342895507812, 937.3258666992188, 1593.8636474609375, 405.6527099609375, 858.8285522460938, 260.0588684082031, 768.9923706054688, 58.18687438964844, -72.25613403320312, -81.5010986328125, 141.102294921875, 703.3660888671875, 255.2805938720703, -17.417625427246094, 1025.9893798828125, 45.67510986328125, 1186.9598388671875, 858.5343627929688, 608.8653564453125, 123.2174072265625, 754.9080810546875, 3.6263160705566406, 453.4551086425781, 1165.307373046875, -360.2569885253906, 731.0043334960938, 687.7041625976562, 647.3583984375, 42.481170654296875, 925.7676391601562, 170.24131774902344, 482.86492919921875, 166.3587646484375, 348.5260314941406, 1354.2000732421875, 1846.4010009765625, 536.2142944335938, -449.48211669921875, 1151.2452392578125, 464.367919921875, 1912.7867431640625, 549.2274780273438, 1171.1922607421875, 1499.72314453125, 891.4180908203125, 159.11392211914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 409.4013671875, "std": 669.875244140625, "min": -1246.09228515625, "p10": -321.54528198242184, "median": 297.2969055175781, "p90": 1344.1001708984381, "max": 2437.19482421875, "pos_frac": 0.78125, "sample": [384.03009033203125, 128.96914672851562, 766.5613403320312, -1093.9931640625, 557.4375, 448.2804260253906, 238.2286834716797, -531.4127807617188, 786.21142578125, 166.33309936523438, 1411.2274169921875, 1431.782958984375, 1213.629638671875, -1246.09228515625, 77.49524688720703, 474.8520202636719, 40.570709228515625, 763.0120239257812, 1400.01611328125, -405.2472229003906, 937.846923828125, 323.3717956542969, 94.94886016845703, -181.76043701171875, 66.82007598876953, -24.26934814453125, 873.664794921875, -45.49754333496094, 386.8923645019531, 26.77161407470703, 125.41740417480469, 2437.19482421875, -311.4835510253906, 2327.541015625, 292.0545654296875, 1525.940185546875, 9.606979370117188, 138.767822265625, 41.90960693359375, 31.547088623046875, 656.3943481445312, -67.86188507080078, 243.02682495117188, -7.602272033691406, 972.05712890625, 915.2877197265625, 921.45751953125, -325.8574523925781, 399.359619140625, 819.93310546875, 1497.349609375, 44.0103874206543, 818.6725463867188, 246.51541137695312, -429.3607482910156, 416.2046203613281, 1099.578125, -407.4346923828125, 359.1402587890625, 880.365478515625, 210.48423767089844, 573.1746215820312, 302.53924560546875, -24.9237060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 392.53466796875, "std": 649.9874877929688, "min": -968.6412353515625, "p10": -356.89459533691405, "median": 258.72449493408203, "p90": 1309.2636962890626, "max": 2109.54736328125, "pos_frac": 0.734375, "sample": [662.9413452148438, -2.2884368896484375, -70.22334289550781, 1760.3148193359375, 251.2602081298828, -385.5646667480469, 211.2198028564453, -305.86083984375, 6.55712890625, 1013.274658203125, 652.7056884765625, -379.04052734375, 45.16126251220703, 1327.0711669921875, 910.26220703125, 1296.317626953125, 922.2237548828125, 1220.93212890625, 1314.81201171875, 74.04119873046875, 123.12142944335938, 295.89825439453125, 1787.3515625, 487.87200927734375, 495.62872314453125, 739.449462890625, 1172.82470703125, 266.18878173828125, -34.442283630371094, -489.4955749511719, 1410.25, 225.2466583251953, 469.69873046875, -968.6412353515625, -92.7232437133789, -151.71131896972656, 434.6043701171875, 633.5648193359375, -662.4848022460938, 2065.48388671875, 2109.54736328125, 680.1858520507812, 16.167295455932617, -50.59630584716797, 611.1685791015625, 496.474853515625, 17.635589599609375, -260.7001953125, 213.341552734375, 221.03045654296875, 511.4650573730469, 143.13766479492188, 720.5816040039062, 177.21987915039062, 499.93365478515625, -677.1642456054688, -330.3379821777344, 374.1186828613281, 711.897216796875, 176.28187561035156, 451.1722106933594, -368.2760009765625, 250.94448852539062, -306.814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 358.9933776855469, "std": 586.251220703125, "min": -675.1668701171875, "p10": -304.89738464355463, "median": 300.1650848388672, "p90": 1217.142236328125, "max": 1870.6029052734375, "pos_frac": 0.65625, "sample": [5.653160095214844, 370.172607421875, -141.59429931640625, 225.935302734375, 491.87255859375, -150.58921813964844, -387.4103088378906, 319.457275390625, -30.076793670654297, 752.7506103515625, 734.858154296875, -345.9010925292969, 96.46077728271484, 454.84539794921875, -478.32940673828125, -198.3623046875, -125.63634490966797, 1870.6029052734375, 1033.430908203125, 542.8049926757812, 587.9763793945312, 1537.5196533203125, 451.0345458984375, 315.73016357421875, 1353.83984375, 624.7603759765625, -85.70805358886719, 701.6719360351562, 179.09774780273438, -116.77041625976562, -100.82655334472656, 1211.866943359375, 446.4037170410156, 48.033782958984375, 34.061248779296875, -19.91362762451172, 897.4700927734375, 121.74948120117188, 303.16973876953125, 398.0068664550781, 558.4271240234375, 732.3705444335938, 692.9591674804688, -212.95657348632812, -675.1668701171875, 858.82470703125, 287.41937255859375, 1734.6900634765625, 1157.172119140625, -262.4164123535156, 37.617431640625, 1339.699462890625, 611.93896484375, 297.1604309082031, -74.07884979248047, -323.103515625, 600.27880859375, -343.9341735839844, 1655.351806640625, 1219.403076171875, -25.844757080078125, -221.74359130859375, -412.99920654296875, -185.611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 380.1811828613281, "std": 622.44775390625, "min": -800.5918579101562, "p10": -345.3684448242187, "median": 270.57212829589844, "p90": 1177.2133544921876, "max": 2307.8837890625, "pos_frac": 0.75, "sample": [-406.63653564453125, -800.5918579101562, 1189.9710693359375, 124.63906860351562, 50.47595977783203, 1164.3349609375, 756.1318359375, 854.9501953125, 390.3273010253906, -291.23431396484375, 742.0209350585938, 56.129905700683594, -104.771484375, -548.6931762695312, 1.3170108795166016, 1182.732666015625, -118.46883392333984, 852.1912841796875, -283.146728515625, 1065.55322265625, -346.07940673828125, 427.85028076171875, 887.8218994140625, -588.8805541992188, -242.2519073486328, 575.188232421875, 275.4613952636719, 47.319732666015625, -180.6829071044922, 289.0850830078125, -87.7987289428711, 1590.2113037109375, 178.6818389892578, 265.682861328125, 684.0724487304688, 881.871337890625, -755.8917846679688, 817.0729370117188, 613.656494140625, 977.7450561523438, 381.6339111328125, 50.037452697753906, 635.7182006835938, 2307.8837890625, 1621.6383056640625, -45.443817138671875, 365.9173278808594, -382.18896484375, 1133.8072509765625, 513.996826171875, 102.07344055175781, 41.47509765625, 122.12469482421875, 162.68467712402344, 1247.3916015625, 192.0214080810547, 1574.3228759765625, -343.70953369140625, 6.3518524169921875, 545.2220458984375, 705.1604614257812, 161.40736389160156, 159.75001525878906, 884.9512329101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 651.910400390625, "std": 700.8582153320312, "min": -473.19366455078125, "p10": -92.06679077148435, "median": 479.43959045410156, "p90": 1692.2075195312502, "max": 2533.833740234375, "pos_frac": 0.8125, "sample": [1584.684326171875, 819.9652709960938, -473.19366455078125, -18.329456329345703, 140.82708740234375, 237.7386474609375, 290.50177001953125, 558.6146850585938, 346.6353454589844, 779.43017578125, 966.8133544921875, 741.1189575195312, 343.6256103515625, 158.12014770507812, 504.2251892089844, 1645.253173828125, -154.55328369140625, 99.31979370117188, 1712.330810546875, 175.86221313476562, 795.1837158203125, -23.2110595703125, 2054.19873046875, -157.50912475585938, -123.31533813476562, 236.80239868164062, 940.10693359375, 1082.867919921875, 485.4903564453125, 1165.76904296875, 384.9587097167969, 2533.833740234375, 698.3057250976562, -104.25230407714844, 2333.921875, 823.0072021484375, 473.3888244628906, 1110.7445068359375, 2215.7744140625, 91.42001342773438, 2139.806396484375, -172.20603942871094, 1388.098876953125, -27.554443359375, 1106.4927978515625, 1023.3578491210938, 297.34161376953125, 2155.408203125, 329.27423095703125, 291.4980773925781, 778.7936401367188, 194.9475555419922, -123.5967788696289, 216.3036346435547, 912.694091796875, 110.4717025756836, -63.63392639160156, 319.71209716796875, -25.06334686279297, 974.0161743164062, 891.4464111328125, 19.515695571899414, 749.7468872070312, 758.91552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 524.4208374023438, "std": 684.6376953125, "min": -1370.1456298828125, "p10": -104.29326171874999, "median": 452.3066101074219, "p90": 1351.7727050781255, "max": 2506.646728515625, "pos_frac": 0.859375, "sample": [1248.823486328125, 31.866802215576172, 890.4995727539062, 446.15240478515625, 1509.784423828125, 1725.0264892578125, 776.5535278320312, 107.28898620605469, 1191.96044921875, -10.0877685546875, -417.9453125, 463.1019287109375, 1395.893798828125, -107.59806823730469, 238.2324676513672, 458.4608154296875, 68.11271667480469, 373.6811828613281, 124.85686492919922, 1397.7515869140625, 2196.3681640625, 432.00286865234375, 10.146232604980469, 2506.646728515625, 1197.632568359375, 1161.22705078125, 255.4125213623047, 85.38250732421875, 793.0501708984375, 686.3724365234375, 948.8997192382812, 485.57904052734375, 37.05291748046875, -1370.1456298828125, 553.0620727539062, 485.4107666015625, 226.35670471191406, -187.07452392578125, 28.598297119140625, 284.5647888183594, 624.9666748046875, -1039.93212890625, -283.95574951171875, 567.7688598632812, 930.9281005859375, 121.3027572631836, 333.12664794921875, 649.4527587890625, 490.54522705078125, 22.13483428955078, 783.6864013671875, 793.6076049804688, 137.0894012451172, 820.0613403320312, 403.2474365234375, 293.4367980957031, 1202.6727294921875, 2416.8515625, 410.227294921875, -222.92745971679688, 266.0878601074219, -96.58204650878906, 707.0718383789062, 503.1016845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 401.3349609375, "std": 567.880859375, "min": -1038.072021484375, "p10": -300.0959075927734, "median": 369.34307861328125, "p90": 1212.8512329101563, "max": 1910.5521240234375, "pos_frac": 0.765625, "sample": [693.628662109375, 724.3873291015625, 605.6546630859375, -299.24310302734375, 327.4752502441406, -230.1795654296875, 1179.35693359375, 286.8525695800781, -368.9300537109375, 401.4258728027344, 424.00970458984375, 1363.323974609375, 112.72386169433594, 173.66732788085938, 342.0570068359375, -516.078857421875, 274.1413269042969, 329.5993957519531, 152.91061401367188, -77.31730651855469, 572.125732421875, 176.11962890625, 315.55133056640625, 153.56800842285156, -1038.072021484375, 939.6805419921875, -218.42910766601562, 572.0350952148438, 553.9450073242188, -167.23391723632812, -76.66471862792969, 209.931884765625, -63.173465728759766, 435.42059326171875, 299.521484375, 1468.5340576171875, 6.981710433959961, 1132.1673583984375, 630.0364990234375, 1300.7567138671875, 78.42530822753906, 396.629150390625, 607.4469604492188, 1238.35888671875, 1166.4049072265625, 618.5493774414062, -221.99966430664062, 522.7537231445312, 760.5347900390625, 761.3239135742188, -441.416015625, 1227.2059326171875, -360.05810546875, 1910.5521240234375, 707.6608276367188, 1090.6497802734375, 518.6895751953125, 1406.22900390625, -300.4613952636719, 663.1929931640625, 122.83552551269531, 489.8283386230469, 197.222900390625, -577.3894653320312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 405.39166259765625, "std": 580.9865112304688, "min": -824.5993041992188, "p10": -212.5287170410156, "median": 384.3136444091797, "p90": 1116.6276367187502, "max": 2453.085693359375, "pos_frac": 0.765625, "sample": [685.6228637695312, -8.837249755859375, 203.43154907226562, 925.6336669921875, -159.47056579589844, -225.88429260253906, 1137.4283447265625, -392.850341796875, -508.7254943847656, -130.20204162597656, 1068.0926513671875, 360.280517578125, 517.3038940429688, -181.36570739746094, 18.709976196289062, 103.93721771240234, 542.8389892578125, 279.4734191894531, 194.054931640625, 248.59841918945312, 382.9105224609375, 528.6040649414062, -453.6600341796875, 2453.085693359375, -824.5993041992188, 1248.388671875, 443.3058776855469, 546.620849609375, 1245.7431640625, 270.7081298828125, 385.7167663574219, 568.900390625, 0.11720466613769531, -30.228591918945312, 727.5343017578125, 394.67608642578125, 71.69792175292969, -15.381816864013672, 65.45095825195312, 1137.6539306640625, -537.1170654296875, 1.3919143676757812, 637.9204711914062, -104.59275817871094, 783.863525390625, 82.9327392578125, -25.081626892089844, 587.0037231445312, 1680.5966796875, 1850.854248046875, 866.9588623046875, 195.70542907714844, 724.4697265625, 584.7652587890625, 535.4196166992188, 298.2127685546875, 947.9382934570312, 1054.319091796875, 556.092041015625, -340.4218444824219, 809.1415405273438, 60.09724426269531, 424.3209228515625, 444.9605712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 514.663330078125, "std": 663.6431274414062, "min": -1076.2808837890625, "p10": -260.8917785644531, "median": 438.36817932128906, "p90": 1340.4014526367191, "max": 2309.470703125, "pos_frac": 0.765625, "sample": [-111.67274475097656, 271.17431640625, 1124.4188232421875, 424.1603698730469, 420.24163818359375, 542.4783935546875, 659.9907836914062, 1367.478759765625, 1053.449951171875, 907.6393432617188, 373.21588134765625, -242.025146484375, 1236.6395263671875, 501.3720703125, 911.6171875, 1782.256103515625, -143.2318878173828, -71.48463439941406, -294.18365478515625, -438.1803894042969, 1145.00244140625, 1573.261474609375, 52.11988067626953, 440.0443420410156, 702.7921142578125, 1992.857666015625, 355.85150146484375, -268.97747802734375, 688.63330078125, 427.9906005859375, 214.28738403320312, 1277.2210693359375, -577.726806640625, 2309.470703125, 280.9602966308594, 630.2840576171875, 735.489501953125, 113.98939514160156, 444.33526611328125, 877.7556762695312, 165.4770965576172, 207.3214569091797, 636.6204223632812, 891.9901733398438, -152.8192138671875, -28.555938720703125, 73.2636489868164, 1683.834228515625, 1012.3717651367188, 1146.5328369140625, -386.4134521484375, -53.591712951660156, -576.5291137695312, 682.397216796875, 978.0, 570.984130859375, 134.93572998046875, 364.3932800292969, 579.8640747070312, 1800.6451416015625, -207.01812744140625, -1076.2808837890625, 363.3397216796875, 436.6920166015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 461.53985595703125, "std": 667.1981201171875, "min": -848.2618408203125, "p10": -150.63851318359374, "median": 369.3779754638672, "p90": 1173.630444335938, "max": 3262.689453125, "pos_frac": 0.796875, "sample": [193.76011657714844, -149.8372344970703, 848.1510009765625, -31.935991287231445, 393.23138427734375, 168.23741149902344, 526.4942016601562, -185.97378540039062, 726.1041870117188, 332.1488037109375, 1411.9150390625, 369.27789306640625, 738.2979125976562, 580.453369140625, 3262.689453125, 1075.3486328125, 200.71820068359375, -19.25408935546875, -839.62109375, 141.06472778320312, -1.1375541687011719, 887.1409301757812, 176.57022094726562, 403.73321533203125, 210.03741455078125, 238.87437438964844, 369.4780578613281, 855.3641357421875, 1215.751220703125, 61.81681823730469, 261.098388671875, 585.3880004882812, 624.4744873046875, 1246.202880859375, 570.6691284179688, 280.62628173828125, 282.48272705078125, 316.4486999511719, 193.31240844726562, 567.7572631835938, -557.73291015625, 358.20904541015625, -150.98191833496094, 781.4580688476562, 825.21630859375, -848.2618408203125, 678.0281982421875, 727.6724243164062, 39.89087677001953, 712.7325439453125, 2786.940185546875, -609.7658081054688, 274.6947021484375, -52.90449523925781, 747.6804809570312, 1494.3173828125, 31.599416732788086, 645.4976806640625, -198.58599853515625, 1326.0943603515625, 506.49267578125, 533.2068481445312, 404.16436767578125, -4.471733093261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 364.87664794921875, "std": 676.6697387695312, "min": -1204.557373046875, "p10": -402.2246520996094, "median": 410.471923828125, "p90": 1053.3000244140626, "max": 2414.896240234375, "pos_frac": 0.71875, "sample": [-74.45162200927734, 861.2938232421875, 1053.09375, -1178.524169921875, 619.128662109375, -405.32025146484375, 474.87353515625, 800.613037109375, 1862.413818359375, 25.856483459472656, 882.2699584960938, -395.0015869140625, -1204.557373046875, -267.63275146484375, 1533.2811279296875, 147.6483154296875, -38.36548614501953, 327.8719787597656, 2414.896240234375, 808.5584106445312, 1119.562255859375, -178.89862060546875, 445.68780517578125, -224.42333984375, 175.14620971679688, 930.4560546875, 75.22727966308594, 1053.388427734375, 397.7824401855469, 264.23382568359375, 346.71624755859375, -178.4520263671875, 425.6260986328125, -126.4049072265625, 606.8995971679688, 1044.5579833984375, 582.48583984375, 423.1614074707031, 660.6213989257812, 630.2501831054688, 36.871612548828125, 234.7299346923828, 1027.1253662109375, 1835.3343505859375, 705.7020874023438, 1223.3935546875, -798.7523193359375, 441.91156005859375, 926.1718139648438, 296.5992431640625, -444.4158935546875, 33.64559555053711, -98.98970031738281, 490.18157958984375, -606.4600830078125, -217.60745239257812, 589.9034423828125, 523.4291381835938, 237.15225219726562, 643.321533203125, 635.4382934570312, -797.4785766601562, -296.59881591796875, 9.927413940429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 444.63604736328125, "std": 745.7501220703125, "min": -1583.176513671875, "p10": -204.12410278320306, "median": 303.00758361816406, "p90": 1245.2884521484375, "max": 3481.63232421875, "pos_frac": 0.765625, "sample": [877.06494140625, 308.9049072265625, 810.7123413085938, 22.33326530456543, 352.34063720703125, 1107.8896484375, 203.9708709716797, 297.1102600097656, -47.87103271484375, 357.2120056152344, 105.82817840576172, 165.0521697998047, 1616.24072265625, 904.6298217773438, 68.34454345703125, 936.3900146484375, 697.0484619140625, 45.56958770751953, -1583.176513671875, 255.21759033203125, 375.33306884765625, 621.7698364257812, -230.59417724609375, 739.3362426757812, 694.9774169921875, 397.81195068359375, 574.6668090820312, 188.18655395507812, 497.14208984375, 3481.63232421875, -307.1338195800781, 1228.7113037109375, 1702.60107421875, -72.91099548339844, 539.9124755859375, 2042.5660400390625, 1616.44775390625, 77.04795837402344, 2.743898391723633, 1901.7379150390625, 1059.95361328125, -298.7317199707031, 167.50555419921875, 347.502685546875, -142.360595703125, 564.3546142578125, 916.5052490234375, 150.0980987548828, 102.6792984008789, 1054.2730712890625, 189.2786102294922, -30.563575744628906, 1252.3929443359375, -987.0588989257812, 889.4259033203125, -36.00624465942383, -520.6549072265625, -135.77232360839844, 88.26779174804688, 656.4371337890625, -390.393310546875, -97.50556182861328, -101.76852416992188, 186.0509033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 502.282470703125, "std": 659.3826904296875, "min": -800.8806762695312, "p10": -207.15024108886718, "median": 418.36695861816406, "p90": 1234.2181884765625, "max": 2370.35107421875, "pos_frac": 0.765625, "sample": [1119.7230224609375, -0.9534626007080078, 669.4537963867188, 723.6393432617188, 889.25732421875, 302.9682922363281, 314.3276062011719, 1889.2684326171875, -127.1939926147461, 1562.9842529296875, -333.7509765625, 798.5763549804688, 442.89276123046875, 268.6014404296875, 741.8064575195312, 302.90789794921875, -674.52783203125, 477.74102783203125, 19.807411193847656, 587.1987915039062, 1232.39794921875, 1234.998291015625, 26.0379638671875, 1183.04052734375, -218.97097778320312, 179.67025756835938, 1939.6036376953125, -210.39846801757812, 565.9359741210938, -542.8551025390625, 1195.08154296875, 210.16140747070312, -15.44073486328125, 1017.947021484375, 521.5889282226562, 416.68658447265625, -800.8806762695312, 339.0105285644531, 732.05810546875, 1080.451171875, 184.72950744628906, 472.8563537597656, 692.7158203125, -199.571044921875, 944.6292724609375, 2370.35107421875, 873.1634521484375, 236.11892700195312, 2196.18359375, -173.5303955078125, -119.69293212890625, 1321.4559326171875, 1187.159912109375, 182.4418182373047, 228.53646850585938, 570.284912109375, 153.73428344726562, 349.1427001953125, -133.5591278076172, -130.75027465820312, 420.0473327636719, 38.811737060546875, 802.6013793945312, -382.6363525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 421.0375061035156, "std": 528.9981689453125, "min": -665.7749633789062, "p10": -125.37526779174804, "median": 356.32164001464844, "p90": 1108.0399902343754, "max": 1853.739501953125, "pos_frac": 0.765625, "sample": [-128.71780395507812, -53.94888687133789, 353.2228698730469, 357.5893859863281, -69.5844497680664, 513.5609741210938, 629.01953125, -45.43853759765625, 1506.711181640625, 547.9180297851562, 321.3505554199219, 631.3302612304688, 135.47891235351562, -665.7749633789062, 665.2177734375, -20.527860641479492, -94.45468139648438, 213.85256958007812, 59.72804260253906, 543.6897583007812, 755.6348266601562, 174.65577697753906, 496.2825622558594, 396.0157165527344, -113.38267517089844, 424.6673889160156, -538.8944091796875, 1412.4163818359375, 808.1632080078125, -32.055484771728516, 758.6487426757812, 355.7992858886719, 670.9871826171875, 310.9789733886719, 1242.42236328125, 261.8797607421875, 1142.8336181640625, 577.6514892578125, 294.1365051269531, 256.6983642578125, 245.65435791015625, 1680.1239013671875, 545.4752807617188, -351.555908203125, 452.57708740234375, 900.6043701171875, 325.3875732421875, -469.94000244140625, 463.169921875, 1026.8548583984375, 108.63995361328125, 356.843994140625, 906.83935546875, 170.65243530273438, 1804.4306640625, -230.59153747558594, 629.0009765625, -125.67435455322266, 217.30731201171875, 872.2404174804688, 1853.739501953125, 84.71980285644531, 548.8153076171875, -124.67739868164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 418.2576904296875, "std": 618.3339233398438, "min": -1126.6248779296875, "p10": -329.3957489013672, "median": 348.3683319091797, "p90": 1000.1734924316406, "max": 2720.186767578125, "pos_frac": 0.765625, "sample": [768.0169677734375, 353.5023193359375, 727.9698486328125, 449.6790771484375, -463.9773254394531, 645.4959106445312, 1001.4965209960938, 988.650390625, 149.17843627929688, 984.9852294921875, -37.791290283203125, 247.66647338867188, 339.3553161621094, -413.884765625, -320.32025146484375, 262.11895751953125, 297.61181640625, -149.42657470703125, -124.612548828125, -146.33380126953125, 1385.168212890625, -7.395500183105469, 841.5132446289062, 498.28887939453125, 2008.5634765625, -67.65058898925781, 772.666259765625, 390.37860107421875, -114.9368667602539, -798.9290771484375, 366.9081115722656, 631.1552734375, 392.50286865234375, 1099.6025390625, 987.1199951171875, 423.4022216796875, 975.6503295898438, -333.2852478027344, 423.1259765625, 569.959716796875, 38.64017868041992, 813.654052734375, -371.2568359375, 1147.23681640625, 317.28741455078125, 328.754638671875, -1126.6248779296875, 995.7315673828125, 335.4643859863281, 75.37411499023438, 395.78839111328125, 624.991455078125, 997.08642578125, 8.243839263916016, 313.3699951171875, 176.53945922851562, 192.52825927734375, 343.2343444824219, 2720.186767578125, 228.90333557128906, -425.23846435546875, 929.0252075195312, 337.8131103515625, 1368.5709228515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 498.86529541015625, "std": 652.764892578125, "min": -742.762451171875, "p10": -321.89395141601557, "median": 511.7121887207031, "p90": 1311.9731811523443, "max": 2206.197265625, "pos_frac": 0.8125, "sample": [438.5497131347656, -532.0177612304688, 667.340087890625, -683.9412841796875, 21.757537841796875, 943.6145629882812, 1007.842041015625, 613.2324829101562, 538.8995971679688, 1372.584716796875, 201.7259979248047, 137.47860717773438, 108.92086029052734, 484.5247802734375, 296.344970703125, -742.762451171875, 702.86376953125, 627.7537231445312, 1737.190185546875, 946.9396362304688, 792.890625, 236.18966674804688, 1170.5462646484375, 117.65079498291016, 551.6175537109375, 320.62200927734375, 1052.289306640625, -71.27440643310547, 954.013916015625, 814.8596801757812, 737.45556640625, 556.4862670898438, 853.6318969726562, 650.4658813476562, 455.3711242675781, -257.1969299316406, 559.7597045898438, 382.2528076171875, 403.8662414550781, -11.526924133300781, -61.08024597167969, 1632.3505859375, 1922.0687255859375, 0.42308807373046875, 747.0269165039062, 1514.720947265625, 660.1426391601562, 225.23519897460938, 148.9495849609375, 788.25048828125, 1.1700668334960938, 805.2188720703125, -463.33880615234375, 148.02552795410156, 2206.197265625, 2187.16845703125, 78.9668960571289, 953.7957153320312, -657.9851684570312, 319.8435363769531, -717.7938232421875, -349.6212463378906, -73.06277465820312, 751.8943481445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 346.78289794921875, "std": 756.8421020507812, "min": -1240.262451171875, "p10": -516.7696685791016, "median": 300.47093200683594, "p90": 1227.5736450195316, "max": 2788.40576171875, "pos_frac": 0.71875, "sample": [509.6274108886719, 288.281494140625, 1265.7906494140625, -1240.262451171875, 222.3013458251953, 860.96923828125, 1532.3681640625, 258.339599609375, 1020.0465698242188, 426.1486511230469, -520.0018920898438, 510.93646240234375, 70.68840789794922, 86.53659057617188, 417.68597412109375, 1055.1793212890625, 79.82661437988281, 40.03315734863281, -398.364990234375, -608.5590209960938, -1054.5224609375, 1002.0438842773438, -434.3595275878906, 903.8112182617188, -219.51235961914062, 602.4638061523438, 387.5820617675781, 396.3764343261719, 43.58058166503906, 676.2135009765625, 342.0096435546875, 677.0092163085938, 1138.400634765625, -509.2278137207031, 856.1865844726562, 478.169189453125, -866.3941040039062, 1082.981201171875, 96.24079132080078, -175.5931396484375, 201.99237060546875, 39.81597137451172, 1414.444091796875, -52.94799041748047, -533.2791137695312, 1047.2178955078125, -34.14105224609375, -470.81744384765625, 312.6603698730469, -36.68528747558594, 391.4561767578125, 286.5958251953125, 321.35009765625, 250.12228393554688, -208.79153442382812, 1302.9349365234375, 417.5735778808594, 331.98162841796875, 2368.716552734375, -1175.989013671875, -70.83206939697266, 1941.120849609375, 60.168304443359375, 2788.40576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 519.8635864257812, "std": 673.9917602539062, "min": -1388.4769287109375, "p10": -174.47809448242185, "median": 436.052978515625, "p90": 1403.2423828125002, "max": 2576.02783203125, "pos_frac": 0.8125, "sample": [227.03895568847656, 53.58464050292969, -57.39897918701172, 1164.5679931640625, -855.0809936523438, 433.08807373046875, 657.947021484375, 780.2001953125, 376.60638427734375, -88.76839447021484, 2576.02783203125, -160.96438598632812, 27.4324951171875, 355.3459777832031, -392.5528869628906, 457.5820617675781, 1615.628173828125, -80.81560516357422, 118.17108917236328, 203.6786651611328, 566.0440673828125, 736.6971435546875, 1416.7294921875, 1350.015869140625, 163.73143005371094, 1840.861572265625, 71.94650268554688, 1457.3404541015625, 1027.7010498046875, 50.92947769165039, 579.369873046875, 1291.099853515625, -180.26968383789062, 549.567626953125, 1538.53955078125, -570.3970336914062, -268.2349548339844, 822.4961547851562, 814.8443603515625, 900.0906372070312, 1371.7724609375, 359.7279052734375, 143.09906005859375, 350.1905212402344, -1388.4769287109375, 850.08544921875, 970.876708984375, 756.8827514648438, 439.01788330078125, 299.88360595703125, -466.9490051269531, 1030.28857421875, 420.0005187988281, 510.8985290527344, 702.4232788085938, 858.2325439453125, 68.88505554199219, 234.90872192382812, 622.2817993164062, 1288.3922119140625, -57.93020248413086, 1591.0185546875, 412.5645446777344, 332.7740783691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 505.550537109375, "std": 666.301513671875, "min": -1269.389892578125, "p10": -273.88405151367186, "median": 390.02565002441406, "p90": 1404.257019042969, "max": 1930.044677734375, "pos_frac": 0.765625, "sample": [226.97769165039062, 497.26739501953125, 248.7576904296875, 1369.9854736328125, 413.27398681640625, -134.85260009765625, -166.55880737304688, -65.82281494140625, 1064.03466796875, 614.7317504882812, 339.58648681640625, -88.72711181640625, -1269.389892578125, 1930.044677734375, 1149.1168212890625, 1755.4696044921875, 764.33837890625, 357.3133850097656, 1313.0511474609375, 737.9932861328125, 185.5788116455078, -612.6941528320312, 1289.9864501953125, -252.597412109375, -506.27593994140625, 381.0867614746094, 1510.8468017578125, 66.92938232421875, 916.8045043945312, -555.498046875, 1418.94482421875, 804.9554443359375, -222.90150451660156, 243.42333984375, 247.16160583496094, 1479.5338134765625, 1214.853271484375, -283.00689697265625, 230.04013061523438, 1289.061767578125, 404.64349365234375, 1240.89306640625, 845.184326171875, -240.62872314453125, 1804.9005126953125, 882.9679565429688, 398.96453857421875, 280.4057312011719, -290.90203857421875, -284.1004638671875, 71.50595092773438, 1422.0233154296875, 230.78472900390625, 353.54071044921875, 1304.7750244140625, 161.72555541992188, 232.12542724609375, 1013.7182006835938, -2.15399169921875, 898.87158203125, 426.9280090332031, 588.9981079101562, 58.7406005859375, 648.4986572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 397.0133056640625, "std": 669.0810546875, "min": -1679.781494140625, "p10": -234.39310455322266, "median": 315.3450469970703, "p90": 1242.8613525390624, "max": 1858.709716796875, "pos_frac": 0.765625, "sample": [7.4788665771484375, -1506.385986328125, 461.3961486816406, 54.865814208984375, -120.06954956054688, 736.849609375, 103.17318725585938, 202.6542510986328, 682.899658203125, 399.53289794921875, 474.95343017578125, -19.904212951660156, 154.3107452392578, 1128.9334716796875, 1048.7861328125, 104.21049499511719, 301.5014953613281, 211.36114501953125, 877.284423828125, 829.0285034179688, 267.8785705566406, 1482.291259765625, -1679.781494140625, 1237.32373046875, 759.01416015625, 38.51182556152344, 1858.709716796875, 494.7309265136719, 222.31512451171875, 551.4514770507812, -37.816951751708984, -647.874755859375, -425.9247131347656, 460.6492919921875, 1221.900390625, -198.093017578125, 372.68817138671875, 282.1287536621094, 616.2816162109375, 1431.1331787109375, 1408.7628173828125, 361.9629821777344, -240.3679962158203, 116.28804779052734, 716.3001098632812, -39.91377258300781, 256.005126953125, 960.3682861328125, -344.6724548339844, -48.481605529785156, 793.5694580078125, 470.8551940917969, -20.78983497619629, 189.54095458984375, 1245.234619140625, 814.2193603515625, -220.45169067382812, -697.7442626953125, 208.30152893066406, 1509.695556640625, 329.1885986328125, 1837.58056640625, 1106.348388671875, 256.6749572753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 409.2482604980469, "std": 756.9337768554688, "min": -1359.6126708984375, "p10": -593.3215454101562, "median": 407.46331787109375, "p90": 1354.8715820312502, "max": 2656.5, "pos_frac": 0.71875, "sample": [437.278076171875, -22.951263427734375, -388.1619873046875, 175.23977661132812, -79.70137023925781, 138.59400939941406, 776.0306396484375, 432.31524658203125, 1796.22998046875, 340.2551574707031, 45.31957244873047, -196.0633544921875, -600.9886474609375, -223.06019592285156, 501.2284240722656, 252.86563110351562, -92.98617553710938, -575.431640625, 549.1273193359375, 1728.7205810546875, 90.23333740234375, 776.435546875, 5.138572692871094, -134.11764526367188, 1285.314453125, -1359.6126708984375, 1108.855712890625, -959.604736328125, 499.9368591308594, 2656.5, -81.19265747070312, 852.7880249023438, 939.6529541015625, 1243.353271484375, 1305.247802734375, 780.6458740234375, 507.6201171875, -665.1600952148438, 242.16043090820312, 2080.197265625, -957.835693359375, 35.565040588378906, 434.05584716796875, 1403.1478271484375, 889.0363159179688, 498.31005859375, 961.7033081054688, 216.750732421875, 805.254638671875, 600.1743774414062, 1473.30712890625, 105.85098266601562, -143.587646484375, 677.685302734375, 80.8258056640625, 753.5888061523438, 1302.6370849609375, 143.25433349609375, -131.6742401123047, -704.8314208984375, 499.80218505859375, 382.61138916015625, 1376.138916015625, -678.1331787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 389.7848205566406, "std": 622.8507690429688, "min": -1210.5364990234375, "p10": -253.33554382324215, "median": 366.2184600830078, "p90": 989.0790710449219, "max": 3175.9658203125, "pos_frac": 0.796875, "sample": [-201.25535583496094, -509.3150634765625, 442.591064453125, 977.2736206054688, 385.3525390625, 541.1859741210938, 713.88232421875, 334.8442687988281, 1295.0130615234375, 327.0672302246094, -265.00579833984375, 1484.2445068359375, 543.067626953125, 679.866455078125, 37.77439880371094, 271.010986328125, -472.39794921875, 898.0004272460938, 1456.435302734375, 327.72723388671875, -170.5552215576172, 291.73077392578125, 381.4499816894531, -75.81874084472656, 116.58207702636719, -1210.5364990234375, 181.00155639648438, 636.1519165039062, -560.3359375, 177.8836212158203, 737.3652954101562, 1398.766357421875, 149.0430450439453, 46.53031539916992, 725.2551879882812, 707.7811889648438, 437.05181884765625, -490.21282958984375, 523.404052734375, 659.7529296875, 749.4345703125, 74.7369155883789, 219.05551147460938, 719.22216796875, 350.9869384765625, 88.56583404541016, 519.555908203125, -185.62570190429688, 542.202880859375, 3175.9658203125, 154.1844482421875, 80.02095031738281, 529.4144287109375, 670.9569091796875, -688.71484375, 469.07257080078125, 590.1220092773438, 1146.658447265625, 123.89776611328125, -226.10494995117188, -71.90035247802734, 658.9135131835938, 994.1385498046875, 331.8148498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 493.3524475097656, "std": 666.8971557617188, "min": -615.6722412109375, "p10": -258.35149383544916, "median": 445.54689025878906, "p90": 1137.3736083984377, "max": 3123.986572265625, "pos_frac": 0.75, "sample": [-114.47491455078125, 29.057994842529297, 816.9577026367188, 653.4366455078125, 539.3878173828125, 767.3829956054688, -48.18031311035156, -101.56095886230469, -560.9142456054688, 1148.1500244140625, 1252.11669921875, 804.5537109375, 105.08009338378906, 483.65594482421875, -217.6311492919922, 498.4685974121094, -110.11740112304688, 2133.85107421875, 653.41552734375, 3123.986572265625, 547.607421875, 482.0653076171875, 58.17942428588867, 1027.138916015625, 429.18841552734375, -357.0172119140625, 1042.20703125, 461.620849609375, 624.4000244140625, -116.85528564453125, 1112.2286376953125, 2009.8118896484375, -88.47640991210938, 286.7760009765625, 380.82366943359375, 761.8169555664062, -290.96783447265625, -305.68255615234375, -358.7929382324219, 854.4581298828125, 645.33349609375, 429.4729309082031, 396.7442626953125, 1729.916259765625, 252.62374877929688, 860.7171630859375, 941.9783935546875, 961.5526733398438, 552.0801391601562, 139.2920684814453, -615.6722412109375, 187.55104064941406, 1814.873291015625, -275.8030700683594, -63.741783142089844, 711.6419677734375, 224.7911376953125, 680.3360595703125, 368.6579895019531, 616.8104248046875, 148.84027099609375, -33.92250061035156, 170.2171630859375, 313.11151123046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 519.83984375, "std": 697.572998046875, "min": -666.3275756835938, "p10": -118.13928909301758, "median": 372.39630126953125, "p90": 1477.0023193359377, "max": 2781.71142578125, "pos_frac": 0.78125, "sample": [258.88653564453125, 1262.1875, 601.4262084960938, 108.51522827148438, 92.24275970458984, 141.08192443847656, 808.74169921875, -26.319549560546875, 698.1165161132812, 298.5192565917969, 811.6017456054688, -243.877685546875, 537.5183715820312, 1153.438720703125, -119.77050018310547, -38.56108093261719, 185.48614501953125, 1316.3599853515625, 452.4587097167969, 506.9249572753906, 714.1380004882812, 72.97505950927734, 2447.2001953125, 801.8760986328125, -46.73218536376953, 392.0665588378906, -114.3331298828125, 718.8550415039062, 657.7855834960938, 411.7855224609375, 352.7260437011719, 1597.8511962890625, 299.32415771484375, 1935.9185791015625, 8.690704345703125, 1592.1480712890625, 2781.71142578125, -208.6645050048828, 1065.271484375, 2020.705810546875, 230.7003936767578, 135.99301147460938, -24.067054748535156, 163.375, 327.75506591796875, -66.95245361328125, -666.3275756835938, 262.0005798339844, 642.5249633789062, 263.7788391113281, 119.87947082519531, 1499.7908935546875, 754.987060546875, -640.5669555664062, -640.5836181640625, 447.79571533203125, 78.1209716796875, 702.005859375, -45.946197509765625, 1423.8289794921875, 410.8287658691406, 1174.385498046875, 859.496826171875, -449.3308410644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 474.67572021484375, "std": 605.0783081054688, "min": -1026.0224609375, "p10": -142.85707092285156, "median": 414.1235656738281, "p90": 1338.5268188476562, "max": 2476.68896484375, "pos_frac": 0.75, "sample": [5.637718200683594, 1638.185791015625, 880.5338745117188, 896.5187377929688, -55.86570739746094, -50.804969787597656, 526.2752685546875, 791.149169921875, 44.367244720458984, -1026.0224609375, 526.0792236328125, 42.5784912109375, -6.5819244384765625, 1238.28076171875, 1646.011962890625, 2476.68896484375, -147.70025634765625, -217.9905242919922, -75.51268005371094, -131.55630493164062, -5.298547744750977, 397.113525390625, 677.7470703125, -40.259185791015625, -160.54190063476562, 750.300537109375, 104.325927734375, 1278.01171875, 415.8492126464844, 1406.716064453125, -22.295196533203125, 737.778076171875, 212.6107177734375, 728.5223388671875, 90.64563751220703, 741.47412109375, 1610.80712890625, 407.08721923828125, 550.4953002929688, 1365.63623046875, 682.4846801757812, -58.401573181152344, 162.3489227294922, -322.17889404296875, 1334.8780517578125, 986.4859619140625, 70.81381225585938, 992.4530639648438, 444.7064208984375, 430.06927490234375, 779.9979858398438, 452.6705322265625, 227.4144744873047, 450.171875, 60.03382110595703, 246.8643798828125, 901.41015625, -279.3592529296875, 1340.090576171875, 128.03387451171875, 412.3979187011719, 230.58935546875, -205.1936798095703, 663.4682006835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 475.6611022949219, "std": 755.9932861328125, "min": -860.8070068359375, "p10": -196.35833740234375, "median": 356.2030334472656, "p90": 1232.8178100585938, "max": 4167.265625, "pos_frac": 0.765625, "sample": [167.9200439453125, 2699.135986328125, 635.9181518554688, 62.507102966308594, 594.75048828125, -152.45355224609375, -201.19607543945312, -185.07028198242188, 481.26800537109375, -94.5201416015625, 643.739501953125, 501.5793762207031, 51.42289733886719, 1386.1295166015625, 347.3271484375, 844.2047119140625, 721.9779663085938, 15.012161254882812, 250.9943084716797, 262.68267822265625, 1188.826904296875, 336.4157409667969, 1223.26416015625, -232.2040557861328, 370.2015380859375, 1315.3963623046875, 25.159927368164062, 532.9765014648438, 377.8127136230469, 1236.9122314453125, 356.25714111328125, -100.13131713867188, -166.52589416503906, 244.14373779296875, 305.82342529296875, -663.6188354492188, 924.880859375, 356.14892578125, 4167.265625, 209.55633544921875, 168.62838745117188, 495.6492919921875, 874.1273803710938, 777.1365356445312, -81.41919708251953, 1186.6781005859375, 342.0545959472656, 241.77975463867188, 677.2672119140625, 75.47145080566406, 1372.6563720703125, -0.5891609191894531, 669.090087890625, 411.3352355957031, 496.55078125, -322.0027770996094, -860.8070068359375, 1798.369140625, 610.0193481445312, 711.5719604492188, 733.78076171875, -211.17486572265625, -727.21044921875, -38.545074462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 535.78466796875, "std": 725.2259521484375, "min": -1746.774658203125, "p10": -139.5022193908691, "median": 416.0460662841797, "p90": 1188.9229370117187, "max": 3055.27294921875, "pos_frac": 0.8125, "sample": [264.17138671875, 815.1060791015625, 520.0238037109375, 637.053466796875, 415.66619873046875, -61.681304931640625, 611.8294067382812, -1746.774658203125, 153.5458984375, -284.7148742675781, -95.8614501953125, 1097.4742431640625, 38.68220901489258, 1188.1939697265625, -97.9466781616211, 545.8380126953125, 849.29541015625, -351.89056396484375, 843.7753295898438, 858.0164794921875, 971.381103515625, 1189.2353515625, 16.78839874267578, 2054.18310546875, 1183.8643798828125, 3055.27294921875, 82.31315612792969, 384.753662109375, -15.042659759521484, 1089.8511962890625, -401.1580505371094, 102.81636810302734, 183.34149169921875, 835.1982421875, 290.20697021484375, 964.919189453125, 249.88381958007812, 1161.4130859375, 247.89373779296875, 783.0639038085938, -173.39805603027344, 890.3399047851562, 1358.77392578125, 1587.488525390625, 329.568359375, 229.1664276123047, 2141.979736328125, 2244.91162109375, 57.320552825927734, 537.876708984375, 347.4853515625, -469.3603210449219, 386.42626953125, 35.45307922363281, 39.99504852294922, 113.28413391113281, 557.1972045898438, 1106.958251953125, 677.9124145507812, 524.312255859375, 416.4259338378906, -7.013271331787109, 884.4437255859375, -157.31173706054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 378.50738525390625, "std": 594.0196533203125, "min": -802.0025634765625, "p10": -259.9079376220703, "median": 292.6643371582031, "p90": 1279.5061157226567, "max": 1944.718994140625, "pos_frac": 0.75, "sample": [461.33905029296875, 33.85968780517578, 1197.20458984375, 1944.718994140625, 482.3880310058594, 645.20947265625, 820.0343627929688, 490.8298034667969, 1314.7781982421875, 327.8925476074219, 354.933349609375, 480.12408447265625, 440.8951110839844, 226.13487243652344, 1044.759521484375, -419.5617980957031, -186.71481323242188, 221.85934448242188, 195.9107666015625, 948.5963134765625, 655.9075927734375, 1517.3160400390625, -37.83064270019531, 93.43338012695312, 157.7248077392578, -331.6173400878906, 355.51885986328125, 1515.1558837890625, -487.2031555175781, 521.5194702148438, 757.4930419921875, 70.40338134765625, -77.45195007324219, 35.88471221923828, 932.8390502929688, 386.4781494140625, 1489.44970703125, 143.13011169433594, -206.9915771484375, 58.814369201660156, 206.70455932617188, -96.33650207519531, 412.2322082519531, 187.34927368164062, -209.73922729492188, 1518.3779296875, 369.9339904785156, -85.29812622070312, -802.0025634765625, -122.54452514648438, 964.0343017578125, -577.834228515625, 548.4669189453125, -281.4088134765625, 479.870849609375, 257.4361267089844, -181.36160278320312, 219.04644775390625, 792.1376953125, 1818.541015625, 179.3934326171875, 636.0008544921875, -717.0706787109375, 133.3787384033203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 502.96063232421875, "std": 736.11669921875, "min": -2221.13427734375, "p10": -166.68568115234373, "median": 401.8428039550781, "p90": 1591.8111450195315, "max": 2178.51708984375, "pos_frac": 0.796875, "sample": [1140.5205078125, 387.8035583496094, 464.8394775390625, 856.5093994140625, 147.5415496826172, 1051.3299560546875, 1833.7315673828125, 588.3834838867188, 230.72763061523438, 400.1661682128906, 559.862548828125, 182.7865447998047, 831.5513305664062, -398.8568115234375, 1127.9361572265625, 1507.398681640625, 666.0823974609375, -633.7611083984375, -237.62843322753906, 802.1885986328125, 1147.09619140625, 151.595703125, -124.25288391113281, 301.9436950683594, -177.80606079101562, 953.9598388671875, 842.4161376953125, 169.86766052246094, 664.4300537109375, 1668.630615234375, 179.3106689453125, 90.94776153564453, 189.64581298828125, 1731.2899169921875, 1058.209716796875, 156.20677185058594, 176.76132202148438, 501.67608642578125, 1856.50048828125, -595.0112915039062, 152.45526123046875, 403.5194396972656, 489.55169677734375, 631.6551513671875, 153.68307495117188, -2221.13427734375, 1761.2567138671875, -101.94709014892578, -602.0181274414062, 1437.8018798828125, 564.431884765625, 2178.51708984375, 21.605865478515625, -9.025405883789062, 194.77206420898438, 1484.076416015625, 691.1798095703125, 1627.9879150390625, -140.55685424804688, 140.3221435546875, -140.73812866210938, 249.09188842773438, -4.540679931640625, 775.00244140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 393.31298828125, "std": 649.4082641601562, "min": -1054.20068359375, "p10": -443.20543518066404, "median": 403.9897766113281, "p90": 1210.7851562500002, "max": 2281.58349609375, "pos_frac": 0.671875, "sample": [728.9097290039062, -339.104736328125, 526.765625, 180.82550048828125, 916.0858154296875, -1054.20068359375, 59.009185791015625, 407.1947937011719, -480.7290954589844, 154.359619140625, -294.6680908203125, 400.7847595214844, -478.9046630859375, -396.66229248046875, 1094.422119140625, 1629.9580078125, 490.1278076171875, -105.56755828857422, -252.8655548095703, 1142.77001953125, -80.35120391845703, -12.406867980957031, 907.1444091796875, 467.1734313964844, 1779.990966796875, 642.3313598632812, 224.98703002929688, 958.826904296875, 949.7354736328125, 602.3638305664062, 126.23871612548828, 504.0914306640625, 150.52423095703125, -106.36636352539062, -240.460205078125, 197.0974578857422, 177.97686767578125, -598.7027587890625, 450.6756896972656, -162.0498504638672, -481.075927734375, 2281.58349609375, -467.2596435546875, 909.07666015625, -81.3105697631836, 422.75494384765625, -463.1524963378906, 218.8629608154297, -9.770492553710938, -215.53125, 927.8402099609375, -153.41812133789062, 820.9574584960938, 584.350830078125, 1289.67578125, 470.29180908203125, 1161.3690185546875, 1231.9635009765625, 1051.9163818359375, 652.1632080078125, 233.4292449951172, 1267.101318359375, 1450.3841552734375, 802.498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..c03b222 --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8891b813d9b17f22e69ee486d82fc7116d6e07164bacf88efbb869b7973b91a +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..086886e --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a3d77476cb1e0129bca5dc9d34053f4503d22db23eda1cfad678410e2e0d779 +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..c40ba91 --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf7241cb460f1a065d74676f3d8c2463d43fbc6cb94af06f143d7c66383b7dc +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..ea408cc --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962f11e177d2a8ca8a1a245223797d423743f17b4f6d005bdefe22b25c931c04 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..a4e58c4 --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa988776ab61a9bd6e9c2acd555495e9848e4a0a17556eea4e7e92c078dabd4 +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..1d65c4b --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25febb19f31d5c956e19f6f7e163f98c626dc8ca28a55e172e8fa8650f1d0ab9 +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..c8f841c --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3833e58a452af1d7bd603653953679a25dcc448f06a9eec48c4bb76e3216ae8a +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..7cdeccf --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96cb625792fb3d9a1dec42e4f5543b897f3b927f0d24bb271bfdd1287aa36b7 +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..54e7998 --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb6ca79054efee878fd0013c60cb1afae930c29741dd0fa80c3adea31b24e35 +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..40da2c6 --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df07ac5e5826c54cb6a473098355723b37dfe86d12ae1c91206a0b0d435ea11b +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..736502d --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80fe1366c85795b3b417ee93d60bcf31625b11fecd3dba780854ca930fe88c5 +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..fcb6801 --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca055fa5ca5ecda616c21110e6181cb51cb8e47d2832a20de76aace7cc73561b +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..8606f9b --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b3799c4e1c1a6589cf8dc38f761ebcd5dbd7539e23b2967239467f3805a69f +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..8c380f7 --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3466743d8e036ceef9f84e5b0e8237428dc240cd42c55d00a1d8aa20b47a180 +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..bb3cf81 --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0718f97a72963b08c476a50a5bfcdb353ca3b453525e58cf614851b26d07767a +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..4f05d58 --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b325e7dc1d689c52eba17c2801e9cd506737b5ff5f44f2e93dcb8832b84065b +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..cf147b7 --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcefbb11b048fe1e5c98313c04da005af9587f9bc9e5afaaed6c2d2a2c15003 +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..93bc635 --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98e6688e4147026db532898af8bab6ada975ffc4f3529d320013d8ccb0f1ded +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..a5dd6c3 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dede21447383556ac392dc69d3a56edf6acaf99fed27e5590e8bf36fd3b30621 +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..3a2d100 --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bbdc7ad06d75700137f2ef287450ff921e90071478a8563d1c948ef86873d4 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..fa3a47f --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a453858e59b88c0d9acdb1467de90ef1de8b2b2c9a48d74d97ba453fc31d6115 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..476602b --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b78d861cce108225f1f80955a346c551917793c5d09fdf13bc2c3a34be8323 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..42798cb --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4792dc4fd124e0e3fc8ccbb764c79d886bc0279de0f9ae21cafb23ba5849dd8 +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..12050c4 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7aeb41887a53c00ff86478d4f5da1100bf209f89e62b435cbb38ffe3a36d07 +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..24e8c39 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ce75c3987f3a9cd983c5c28735e15db6500b62c52c5038fc454d14aa2f2828 +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..0155896 --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a841c2ddec36c1222ffa2de8784f04680ee32b5b7db838282d45e31c21cffdc5 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..8995cb0 --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b4ccea195e441222646b6a72f2115f6c0bd172ae0eec954a3218bf892ac962 +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..3e776f5 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa65fd56c66536675a88a9e40aef4092930f4369a31877926a76130503ad8fe4 +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..658202c --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a900b088f73426f8afcefe09be6d56c67d948f9bd61ed16441f1b131a32f3e +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..a6bce52 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5129963c73d64a905a234a01565a8641d4e79a3416654ce14999f38a667383ba +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..a65d1a2 --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27b1ba351b7fcde6c8b059ccfb601c64b8d665ef225396b3cdd90182c3a8c5b +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..732b56d --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b12184cd9332b97a11ec70b7a98387156c1d51cb0231c62f5e5b9a7d8fdaa07 +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..03f9839 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee60a5969125f164fc02d4d6e053040b519efdf2ebe2bdfd7decd0ed05c86be0 +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..3f79f72 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff379b40fa29dc716ecb911b01d910170e03532b5758212d5344ccda4c78afd +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..e2ced5a --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c416f92320581035e3adb03e7c515e0590773fa2a121bef3aa2aae82ff4911f +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..e75d8f9 --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a893ce0f17c8405f374ae7d2d08d9cd820a0958dc895b2e79226a3201293f46 +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..521d78f --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2afcb9f2171d9479de12eb1d9cf8fa32c3eab9f8ef26412d3dd1766b4fc6a0 +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..939a6c9 --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bcc37cf4756461030f65ae17ad3536d15ea69df30c87ebf194a17500f76d840 +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..014f2e9 --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b28ba81c27d95f6faf8f0f27342bd35a648e70eb275d6ecceb077cd9749e15 +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..7e4a461 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63dfc296369cff6a83e0a695fb6d1f30fb431b4796a140a21e12bea508b79d73 +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..1acc456 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d81b8c54700ef1a76ad217e57ecd0b690fe71fc3b9436b28f217895bf7e4f4 +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..d1bca97 --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c5af37873924108cb25b2cfbbf6c639f3d0fb6073c404a4c8d8da4823aed53 +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..1492955 --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b01b2819b38e7c7e3ff62e8b8298d2fb89487d26ec1fe5abd1fc75c776a4ab +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..d07d752 --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a47c58209c5f0de46fdb6e1fcf0a194d302b398cb155bc13a9b24b3de2554829 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..c43cf63 --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca3b4e05813725fc0e73bbff4cd3f19e74f8b0280aa7a9d6ca037491eb143dc +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..f69db35 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3880c5d32eea11ee8a3089ed0f1e6a8a6617f29a49da1d083bf1d682f816bad +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..1ff949c --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15adba5d5bcf02b65efddd3813d402902d2be05f0b8ce427080d0f48728c5a4 +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..f61fb35 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518eab308ce4862837fb073773373499b4f2b9d638c07a6be9f1dff934972da0 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..a1f1d66 --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208b4353807d1bd4292aa1721d6aa16cfb8cf61b1d189753950168fdd169129e +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..4c5f510 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd21bbebe5ccc03afe56a2a626c76f898e65d744e8c45dc2376b3b65dffa6343 +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..dce9045 --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcb3e8f34a798dfbfd931d60a49e05375681582d889118ea32fd4768d65bb17e +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..00bc05e --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a219ef4c9e04fbc972c3e746108707f27b67d56ebdd7fa98d70f25ad2006f8 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..a603056 --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f89cd1b48daf2898241ef0ffc4d32980efac5f08cdf640a68b14cd3faf2f3f +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..c3f27d7 --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a82427413b9f948d78d3fbd99c702a4c7c2cc2437250bb8dcff0d2ab238b94d +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..3c0fc73 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb343c9a9c38c042d1912d5261a947a761b226e182ba1666e02f25bf0cd6d036 +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..672f1b9 --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2170a33e6d42d5774b8e6ad43b80df7e6b78cd9e8e732458fd55898f212ad421 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..5c93413 --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ee6f45e608321d22a3c23ed4f72a507bff764673c5722a94198950d297ec5d +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..e45fbbd --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62646dff26627916dab5c14192d06618550eacfb647d139bf5e857d46e347d0 +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..1a1092c --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f635ce1d9c3eb09a0a06dd20dab54f40aebc2bb488b58651ae473f04c207f0 +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..8123b17 --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc8b9cbedaee07844de2f40f1c9fb681812e57a6ba846038fde5afaf60b0c5b +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..3c35ea4 --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f915acaf4cd53d2be29e31796b0edbb3ea6522c4ceba5ffa7caf8afc719d61e +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..2099fda --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e04e0da23aa7db463e92720e5a88a4c3afe1ae5502dc538a7a5fe27267c99b +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..50c4011 --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b579272018d343b1deb94c349c54fc75d5f9947c9b7712900263dcbf531a9c1 +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..03b9fdd --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575a8d29b237ae2e6ac02a5c698582ed0f43e3c770cde9fcb33baa1275ee46f6 +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..a3b0c7f --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79c69156df1d5666b64b80510c9415a478866b25719a7a81953520c1b0777f7 +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..085d1f6 --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6259050515264dc9775ff514ea77502000e70e821c8daca2c99e508db8eaf661 +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..070549f --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92c56f7c97bef5e7f1cee31bfe9516b3a6149d3f2213e74a7d00eb18fc0d8b3 +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..c449985 --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9fe5e1018f0ef12bbc3eddee056fef1679a29bdee4705b4f58fc38bcbf83e5 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..6f06db0 --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046223e3720149b3222929cb859fca91bf07f4c99aea0e617dd6ca791f5629e8 +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..7f5fb51 --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7795154d01dcdea592c423c39a0984d4fdb2b0da65252bea55d5d4565305eb3b +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..147f9ae --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95ade2378653caf8b23f77f10ce023eb5a83f89b0c391a593c76b7e73587b90 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..a6ba50f --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a9724c80ccb6f94ad73cb5c53d2425fd00ffb7ac6a67396963e44863608c81 +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..2916393 --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985f52b432509bcd1d2dcc22a14b523dc1c26f2604bc498b1e2433df5c3d5a04 +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..fdd59a8 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f25bbe255aeb6a85b44422831f80d02c64fbcddaff57cb6cb6b3ffe012d2253 +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..9b8e402 --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6ab131fb47ccb6e7b125153236899a125b0385cbd52a49377af39945d72be4 +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..dab32ca --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a41eb371144759a5a592ae0c1b0907f228344d8e27104ac1a156db0f5dbf9fa +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..5c9a82e --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f7412368b651019f00bb0c152bd92cc5df63af1ea8c97328775af1e95b1be5 +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..58aacda --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7358ff0562011460b191e6098d59bc81d08a7f159b57efffe9d2f20c8390cf76 +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..219967d --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c86e1a60a4fe33b70c81a9724d4cbb8d88d9238f5b260c4da3bc9a33177c09 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..8b8ff88 --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd943b047b876ef73d32b938bfc7a8624d1209533f50536350d1780a0a385b5 +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..5085904 --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd079214c7b476e50a3ea39bd0e4dc165db7b0991b93169678b804a9586a3211 +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..2d9560e --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cacc6fda387a17c858a60777ee0bb4681fc116072052366c4c169066273cc862 +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..ad5d8ea --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029a9978931d99f751cbed4fa5aa02634b6223f6f6dc9e1d7293aebe4785dc20 +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..51331e9 --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88279a9b8bebf789dd79b78c40d87347a91c55e0558dba56758282213c8c373b +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..1efd4e2 --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b604597708fbab324a3e43f0e802c64058dc822853e1f234e07b6de29d7583 +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..2112401 --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb4b52edc59e2a2560e68c5a9c4958353e13a9d4261fcb13f71cc7df8f084ae +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..95a3769 --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a344a6ec8574a6cd9c35417188eff82dea8289a75a3dc5a2b8a7727951ef2fd +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..2fbd7f9 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9dbf33d53b46e0a6f84b6e46de88b3181db3705759315d1b85ca358c0926c4b +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..b725602 --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b190633ca5a5070a272f5f714e3f55ee19f7aeaa2c26f6bf3875b6ebcdf4f31c +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..52e8531 --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0599b6f3f484e5df1654c70ae4928b696bc56797c020f17fd0d6edd6e76d8a4 +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..48e8fb7 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473e05514775733d678cf16bc7b63e90edfb29070578f97c2b32bfd36414648e +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..f5c5cfa --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1696011df8022b6aef386218de9e07c54634b16c40ff580f037518b09f7aead0 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..f125a87 --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e7399039ef216902f478706157c6d79515fda94455b0f006dfdc01e2e0130c2 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..7ce3349 --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204d40bf8061d1ed62ebf96a55223fd9c70bf4e59645b4e0743dbb4ccc784c24 +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..6eb03bc --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1df762a93bde3b09daeb96915aebc836fed0b5fe72ae836a509fe8cf3a5591 +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..93be8bd --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd68d424833a611e672ef99456ff9be9e7e251cd8ad4620a904d58de5b687ee +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..55e45b8 --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad29b05e3507a23a940fbc70bb7b80de581f8af6c3acb30044a20a6bc067ec84 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..219a653 --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acee5fbd9f8bb09a25dcb162ddcd1f00da44641a3d6323d8579dd67ecf2382aa +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..2581fbf --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c003f2d83370006ab9356d574a9e4096f71b7f5df5c6a5b2fa160874cfe21f7 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..6f4e977 --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e9ff293280ecbc2a2d9c6b802a8df521c73efd597a7be55cc883bc94b399d1 +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..b015691 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc470dca470b8c4dfb85d88f46e8db2ce47e20e50596f7d83f6f4db8298f3c1f +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..254cf29 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41c15e03e17a28d0e39e12ba8d37fdfdb59a34eb804c2e5aaf4df7dd83a7e32 +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..423a399 --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98fcb971f47f3b58d49610e37a3672f956cc879079ee6cf455c288a04394b4e +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..c4cfc09 --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b8c74a8e0e18efbea0649f02aaad33ce26388a197ef94ff717682f070fdc9a +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..5812d0d --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6d991f98a479dc4fa0e7814c93a0e2362878916d779885767e190eab99ea77 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..f984a4c --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2514fcaee026d2e9b5e9472f5abfe344ac2ce0a2b56cee9045ad1617550d6b9c +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..77ebb98 --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c5ba3dc39989fae189ebd79e2e1637c96d2cf178a4ffc46a8d11c7680955cc +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..295b452 --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f589954217c5d975219404d95e77bcdf911f905ca29a891d8a1b02d612ece4eb +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..0f2dad3 --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac0f018f8da95605fb7126979b2c53ead7a38c12015bf9b175ad4c5e114a28f +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..611cfe9 --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f72a12eda4eed64fc124aeedbe7c22ba46b5c604fabc910a184ec45d5bf96dd +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..9a00253 --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abfbc6773ad8517adf62449d3f98873775fd65e65f75690891f7e36993ac4198 +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..ce0206f --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb499a52fbcc200f31af92c6524053f56905d17926698806f95da069d1f1919 +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..99b25f5 --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0398f429644c36aba1dd5d2696ddda5e2f25406a23f00907d2cc26b5ca9c7f83 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..8d6dd68 --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71cdfac2c0410998f4dbfdd342785e0c931ba618b225e151793ebfa6c319beea +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..bb8775f --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503a614253ec1b9c1fafbc4791490355a554914605adabf8cfaac47d88f0efb4 +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..f208429 --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9496927f0904ec9092e5a99e93cd42f82b2f8f94ce4d1bd2c9c6e8d9e58e06 +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..cf13733 --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2a0b75424224bac146479e40a9f013f1a911392b02b9aededc80bc5dd27e5c +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..05cc6ae --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3fcbe61a4c2e6c7e69c4d6d7eebe5b969a6a412517404079fffaa0f6b3fc9f2 +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..877ff36 --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9885bcac4e6ec4f03740346f8e44285e1e7f0fa403ff3c46fd9cd3ecf441920c +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..aee92c7 --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba54e322b97c8eca04c70cd8f54037eb141218fb45538cc46513c91e46cb8842 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..c4da072 --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335bed3cdb9cfd2871670c07be35b33e338c979d08411b7fc64a78266d39f1db +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..98d8547 --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b4d6c48fd9adc40138097fe2935fc269823e90e872ffab118b6b8a8640bdd8 +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..76c9b69 --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ed1aa7ee7e4701421aa073ab27985dec0346dfff76e518cf06f6360f9681ea +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..88143cc --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8facc9e0bdaa72801c6b57f84deb5bf539de2b5e597a99c9305f3dae790489e4 +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..7b18434 --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae98c79aa5409f71eae367f4786abb7b37a48979bd169a938c369646e8e40f5 +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..377ef6c --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6edab41c3a2516922b921aa370b3439a3feb9de72b79e769761e7661445637 +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..d322dd7 --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5009c0307ba8d1bea99eeb511154a8b120afd18804c203edeca4681de90777 +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..dcd86cc --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ffe5789617c7e4e8f5d6bf5b785d6a6a5edba46938af118b547f4f459133e1 +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..01d3bdd --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb1d0315196ed9b2b1af7b59c7a6c862967e9b50ec8b27f86886859b0573c1a +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..4f938d0 --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8f9a514275542cc74c539548d84cb27e4f301a7f796cf915646f618abe5b98 +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..36e7df3 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00eee86a853bdba585cd88e04ee30ddd6115c3e5eba080afd01bb284ff725c27 +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..a22bbb4 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba05ccdbe72d411f69e784a0df9c6ae01066dfb8d3d58876f9473eef5e778e37 +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..d4e6a4d --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7227b3b50f882ea7ae822131a18874d3e5a0876a29f1e8201c0545f49c72e34f +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..fff755e --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e922c56fc9d6dcd64af710a237dea3d63f011485501feff86dc0af55f9583cb +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..d20fdaf --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1018c06364328eb61ad51edc376a932eed293c0944b604038e4286f32a0380 +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..699823e --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0f21b3416292164972ebf118bc8443b139dd53bb9f252dbf187fdb5c97fc22 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..74e9795 --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e115a6876f6fb0b066dd2047edf2afac4cfcdfb76ec2cf8f3a7b85939abf5aca +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..0f3ed4e --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:922ef6e2ed601813ce5f3017637c4bdaf6d052e365842e2460e0ea613091213e +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..099c7c9 --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7b7cb474b433437dabc9f55f0d2754e7517c3d2dfc5210dcc85fb1c2860745 +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..22cc6c7 --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bdbd3c1305f0b0fdff4f77dfeb2f224e4cef20539ffe10955393d4638a8c3c5 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..aa7f7e7 --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28846e27f6bf2de0e093b78df920a2a9a36b3a3f44512cd6f402efbd921a6fe4 +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..216834f --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ba2b0b74a09e3242990ce053dc5951f33b6461a5dfa90453fa55755bf3600b +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..3b51832 --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9acc5904d1d05a047ed5d48e055931f6248dccdfe2f1d2539becb77f60ae860 +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..71e3967 --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0770c6c3764c2cdeb5cfcf728358a11dc9c9ff20243285ed16dadab548a9e0c +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..5c071d5 --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4881b098f8f75e11b5482c27a6b5c8b5e669a16ce05982b06b43c93e86b16b9b +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..752641b --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f9def4092792d3290032ead5be5ea443665840ee0999af3c945196ebc9d03e +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..8e63e90 --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec1805dd7c1bc7f5bb021d9a94df4382c67cb825858cd2a8c603402944ab063 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..e2aaa35 --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2300f72e1bd9bd84c5b65d0c47972b4931582c43ea17969024e4357f22245306 +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..ca17e41 --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b7ca27a5b2a23dd4cac524169f59845035aa11e6055c14ef62f6b2c669faf2 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..9cc0ac7 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b0eeee783ebf5cf3a6188b568d55c461a7d5066577ddaca7652614429dff42 +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..7ed743b --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6eb25c5cccf9abcac49791e2832d1e5273aa6f0aeaa4dccfe97fa76f33eabfc +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..42148f0 --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a720d97642742f830984b25eedf283d9a2fa8d1473b19e790d121d51c62fceb7 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..eacc122 --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242baa5b8b1c1f1db56b749fee360391732ddb01a90aac2d1a2a3c65ab8e5516 +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..ec2d78b --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ad32073619942fd46b60af6497860aba98cd2ef64e5292e5fb6dccb79c827e +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..fde740d --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437c8ded4ae3a3a08498a5317328f01170db6578f168b870c1b91f555f55fb02 +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..9723fc3 --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6f3944e89bba9661d33d94cc9d22eeca1bf4ac8d44c5895fe459e607feefef +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..5aa0723 --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba9425b66868ac868ec39a5b5c93a1e457f67763b84f8e583d01a35f85dfa41 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..0604259 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a7da91ee19cc0490014e1932e8adfde2cfd44c487b9e40ca4d8e3de18fb739 +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..365c3c9 --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4521224da16ef7a126e1b22a4dfde6e95255493687f0c4c4aebf109caa2431ec +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..f7bb8f7 --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ac244df752b304646f7770946160ae0b1bcb648e7e7d6f28c7081a8b9a69fb +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..d4fecd1 --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821797282c171f9e53bc06d8d56f7601b1bf8b61f0ec35fb9936d43ab0823c35 +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..12aa229 --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2b2cd5c4fdc5f8bb111c2cc5b6e266e5ca082fdd104bf31de3efb7144a1d83 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..a1530cb --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a6788d56ebe46aa4612aa853015cee3ed091a1c0e77ce815e2aea86f76e2d0 +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..d1c2d23 --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a845b395708b60a851b6294c487ac6e4525a21b4a2ab0abf2c1bf157cca641 +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..61b7f8f --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106916ceaf8e77c79e1887f576324fb06814d096edd9d4eab63265243b77eabc +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..1272aeb --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9fbf61d86455f406022b01ac1e26f9b479ad9e836e817cf20d801dd4fba831 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..0f2a885 --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537aed22a2285068ef6fb7ac87de93858c6820705bbb37d945055a2f20c4d055 +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..38a810e --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e73b0d3ee58f10658639734a80149d7ace11344f647e6520b49641d260240a +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..d1ca322 --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f991b515bf7524bb62daffd154687625a6ccf8465738bc2ac25e70b8b764c75 +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..14a608f --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bf4e0e58271e1392c186c2926fc337e3eb133a06274bc858dc89416bcc38de +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..5970300 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357b852e768e23fefee7cf6dfbe9af6f7b82c40d985c14f6586ece30d3785c0f +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..976145b --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36736d4f5ac1f1b335c96dad21fcb24a93769060c6d2b6d4c10d7d7c77922bce +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..78a58bb --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e9a0969d1509d3ad254e333e93c1b43561ecc0cc1e366a11dcc171807325b7 +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..cc1d6bd --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f125e013aeea48f0e12703dbc14fedaf90d2bb0334550a314178a5166823fd0 +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..24b8eb1 --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d1a4a5c826cceed581f8166cf595a7eda6f804d39bdf338484ffb855ccf7ff +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..7aa17ae --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1038cd3caa7cafbacdbbd9d9cf2ea8dd73fd3dd2bf7897bae88c23b1fb5a5a7d +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..2844893 --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3314824b19da09a9852168f61a09139c5db403dfa0e44c23baf3066bfc0203 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..70772f8 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e51a3904b92afcb9fdd61d4cce2ef58fe27b1fde476c1fb4b9548773c9c9f4 +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..e1fcb31 --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3438f0d29025b79f419cb82619e4b85a26b93952ba4e1e96c5d11e8078eb93ba +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..f0ccfd0 --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942efdd5cbe5ed271acaf6c3720c5ab159cc1afafa39750c361a61871f8c1576 +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..bf6a9c6 --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba08d980bd7c3fcd685e1580a3174d2960a456576cbc7f2f082bbb9091e384b6 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..817076b --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97df332fa50c578dbebb00f0580a82458402782598fffdad521563ece3e983a1 +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..bbb89d1 --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e136360b77c2ab6673ad6f9f3fd9293f4f1f403a8919252d61df5dd45ba40ab3 +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..fefcd30 --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1d440163d38b90efb66f59bb5fda0a04e4f1aa3617a616c09ea0fab08fc5a35 +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..5fe4358 --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4ff13d0b510b20f75883f646c5264f5e5beb91046338a6a83c200467ed2c35 +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..459be0e --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55c3c33d51abdd1384eaa90677370d0bd87bdc73de6fe161b83d907cc359121 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..1d130cb --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6eb23bc480dfe5c169e4b202d42153091b0c77369346197962b58a6b5d23591 +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..f0ec426 --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1390853a07c719d4d878e53be85b3a1ec29f380af30915e380cfd92196c7693 +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..35b28cb --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d7b01708eda5fcc30f0c2c11cf621d0ae4f922e6f3f66edf288be225f0d1c4 +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..742a575 --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02018efc9728261ce5f93a0b4166ecfa4b5539ff1b3dee92a29b8f309ea91d14 +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..b4121a5 --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3dcd76c47457a7990b9171087564637c14b8849d6acd137fa401490ae4ece1 +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..fcccbd5 --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a768db64f5ad0b532890844ec5d55c3664bf76806023a6670972f8893566c69e +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..11d959f --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868443977adf5431ef9fe6208ff4b0cf15508afa3d41728391220adb2ca8fc14 +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..b0cd0fb --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f449f38f5bb2438cfe39785dfe347dceef24b93ef867aaeccc596c919f8f2d95 +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..90ab87f --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469427b8f74ab17c350355e6d172f48fb59627dabbb9552b14e94ee9fc6e568c +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..415b4b3 --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae8e5f6cb20f3a723b7f945b9db35ab20ff4a16edbd01eb29b75479c8549617 +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..98a3609 --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ebf33043eab0730d6889591d3b3af26bcc6cc4e94e9ade3cd637559995d341 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..199feba --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1b7b3a80c33c3233e8bfd86ef968b7baeb95cbf9b25daf27cccc6d86bb1227 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..84e289a --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12a3e5978db9235e48c8c485c7582ad877e357cb6a5b38641bfbab76f3b50d4 +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..8415626 --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5401145a27164e814d2b631f32a1b02a57841dec207bf6c6a9deb2f3496eb2c4 +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..19bbe3b --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c3828d1a90b1d07212e79078edb994255fd844353f3baf1ff445f4549abe95 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..aeb1df5 --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd14af66f0a3c1641c8cec417261b31767d64d42ef90bc0e76ae0d76515562bb +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..50e7639 --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd772f200a9a8d2d9ff70d7b2e661c74745f720203f6a3e1d5c87627a6c1d5b +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..44c6524 --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1b2b25843bcb1e0bfa697a1ca7f586896dd434d39a2bfe8e99368e23701207 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..2a7ffdd --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f6d17c5967cc04ce616cf7aaefa57c703eacc022cd24022309b281905e7dc1 +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..19776af --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3560b2163a244be09fb456bdbedd8b7b544383fd52770ab0f5227c7966f5736 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..84332fa --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15df5ef4baabed8e208dd6bd93d6ccc63055ad6725b9231a48fd974d5ef3fc19 +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..e522f66 --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9331b6dbb0f5215ad732acfeeeb56cc86fe4ab061a16ee55d95699a3f2adc38 +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..66b9096 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cfbcab3c59d209d2a7ccac1bd2af2c92a65883f53569459c0602e85b75d7911 +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..de5857a --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e10febf208b1a685ccce630def3445ecb7acfb31a07113108cc7358401f1afb +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..704ca35 --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b7aae039d50c4307829de7ee1be436830cdb0fa1ccd02d2e2718f8b0dc027f +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..7eec76c --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1d447943665919f2fc0ac1ddfdb034e3ab3a414aaf7fbc2c35e2baabb568c1 +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..d1d675e --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890e7d5a2d40f46a113d85eb6f0f3f38e58da6491d2455b0c83f3ac7d8897c96 +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..91f278a --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0feac7ba372d17e79fc13a771f07232379eeec87af0330c3387ed27bbe87d4a8 +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..78bb99d --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622dd42f6ccf98098c694460762d314cd46ebc3108363879ce5b804d7f920bb2 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..d5a014e --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee979785da0652876952c08e09e83fafbadeea71e027b8369073e6195f0bd7bd +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..b8b53d5 --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197393922bef4c0f6c5188063a06aba85387d2a8a2d4436cac38cb46b50da504 +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..9d9b1ae --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba994d95afee2be5a5f59f4a3c9c7c768b6ed366b5d6ab27f7e50abe1109ccd8 +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..c240259 --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b7c25f8ed34adcac4ac517701c2089ed84bdfd14bb1f47727de3cfaee066ff5 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..8acc8ef --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c18094979c1b97927b16dd6e943f6b5affe944fe433f80f921c1648ae77e6e +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..9430bc3 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f022a55e251b8ce2dc0853552b70bbc30604a0415ece4b15b53174fafabbee +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..6923c67 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761b6cd70d4509ccc15d39c5ab6ce6847094f85a9d501ea344bb521ce24ea6a3 +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..fa5dff0 --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08c0955a5390701357053b5270b56d4935c13333ae68d659444a84d12c21753 +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..5ad14a1 --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c67968257377d19139ff69f568aa46c9d2833e1f155151e3ba8e5e65028c838 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..e469936 --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72602c083e2c0878b7a4e04dd3c19d2a90ad2a4092a61c0460e046fe2320986e +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..880aae0 --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1486a1e1b9b87763a433a7c36f20d57b252f65fee185c409022d92ea39f06fc +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..dc66ca4 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501e9ae0f89c6be713be75cc1b29efcc2655ea3c3978dc9245eb6b0dccbb155d +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..8b35f92 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d57bc1d4e044e08737da746c4fea12c592d82600737a17c53a6b3ff92423a55 +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..6999352 --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4f3e82a6dca6309f8d1648c6f71e3d3a445d904d897f663e52927976a95a63 +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..8858507 --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ac26e2021df507a226c43572fb48acd8ed56fa77778b7e17b145292dac9e9e +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..cd0f5d3 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b9d21f4c6fd730e77f86a694932be244e99927adf71f3fb905fd6a2639fd21 +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..9a843a4 --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee83a4497a5bc8c006265ae1d8e882b2f820ab6622a48273f35871cfc638c44 +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..2ac9658 --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0539776a392c89c38b601c3eb9242048bb7f195fab41d9b2c88d0510dad869e +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..f315c13 --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca0958ae4d632c577147567258d2f62894a87f36f00d18a9664204547b8e1da +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..736900e --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8bafb451cdcdac4b7408881aa56539e6d28f71c4526931cee86b333d70e0e5d +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..e8748cf --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322ea150cab02f497916473160fc6ae720d8bed8bf21823985722a65c3e738de +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..da46df3 --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5a4342402e70db008a874ec6957e1468e0ed1cb11ef26b24042bf4ef3c5b7b +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..240da82 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67acd01e8aa4c161c7062cd11ac4f32d89548f8c6e670928513879adc8e0538 +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..75f8c80 --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0274bd5df95ea135f89fbc12f203a479bc8e602eb321d53419cb0da0cb31838e +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..b21bf27 --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d8a77dff65bd684b5e04ecf20885977ef47f21e8a508ac8692519ab7c6ab2e +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..e9dc498 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4617a16992359239e8332619cf9b496ae1ca64e369009987f3ffda244a4ce345 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..564b46d --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29302450189383c32678b1ca89ce6b443e64fd988a3f6ad93d47fa6207f873d5 +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..7e4eb79 --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b40d6ccde5fef8a14181a3013a9631f8c98a2bde2e5bd09365c4c8179cf1a6 +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..a87175e --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45108327d80693fb021b25b166176d1a743d216d293d8c76cda08b4c230a9af +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..2616a74 --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d703a928903b82eaa9d9b707c744cf7e01f27490f63eaf538539f638abedfe0a +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..f8ec289 --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429997122a9b1f5f5e3c6a33f572e206b1ab92c9c39eff62ce4950f7a159ad88 +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..57720a4 --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df8527c291131b7a967251921a1dc1369cacdc041760157cf12265f7eb6e9fd +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..5ba00e2 --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539e8a43c64d98c71360487a697cd20c5674aa6c0e493d784f5cb0fff7a1eb31 +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..f24f0f5 --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9c3de9e02b47a7a2fcaec0d9be2a14a2196310e206ec669982b215c44977cd +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..bf35b60 --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2c2058440d8f4f24053590b5491535655dccdd880256e53a32039bd6195237 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..b25e467 --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fb5b2e8c7236e7bc1cb8dbeaedb6f3903edee9370e714ded9da225eb0efd90 +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..f6e17ed --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f4fb4cd58d03e7c8a24920622f3e849a08cc6bc61c6ce5aeb00c1920868baa8 +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..3b04001 --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a1ef920db33400399ba2d03c7216f2ab0047e49e211fa2339a5ec513f22273 +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..c536962 --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fd5c32d530e99bd3333d9e394bef5335015aade9e5b5693f36352f1c984641 +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..9d7e2cc --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f7da4c6df64aa4f373bf7cccfb7339a85d17ec79a826f0250930e225a70569 +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..19fdb09 --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e426c5b7fa714f4cb6912dafff8910f53d4417dbdc19f11c36958f299dfbf18 +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..86677cb --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fababb9a78cb4d5ba275624993256a791a779c0e75d6100974f722671c31fef +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..9d3a622 --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d3f95e669b2e29b9d62675571e068533cf575fc184170666fd6d8d91dd57e0 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..6e568ac --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5bb70f198e0e60836e6c0e26e5758ce81bb4b842e43c3bdad42dd39564b156c +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..b8ecd70 --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63cb71f106d3f2be551d5f75f90daf319c15ca6848dfee5afe1849cf6fbf288f +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..c121363 --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9edf579eef1363055ce7013529cff1ce8d011b94715bca04634b937064ad2aab +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..8cdd8c2 --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a8c980ee7766d312bf3375ec5065f33ca4fd1c425f3ebfe6e7b15ffec0f3c3 +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..df43dc2 --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8dd30c455c30db30a9e84f1c657aaefbcbc4ac615015f03220e883454a11d3e +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..cd3b01a --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36ccbe3bb2cb3bcd59fec2c00ebe2d14d3e0f726ec3caf68adf2c9f0d3ff34d +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..4b62553 --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d6571d637ad34e8d2d9693c955e8105e003fac61f71299784b0a97f001d569d +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..9d30890 --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e46248c3fa12b4b050ceb39f4f58a174906893a8ba5854a18362d5f82fa849 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..bf5d9b6 --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1eca173a817f1a0aa79d875b0eef3de70ab70f6ed6934600f40aa4e07ad4241 +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..ceebe1f --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3357df2a13989a09a7cd0ab96b70a2fac35e47c3c461da0d20eddb6621d8129d +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..c400d90 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f6484b31bec65d0f98fc39711c74f57917d43647f28c6bd7c2e7a4dc9ee075 +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..e656818 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0cae154abb0c4d86cbadf974fb884489717b2ed60810dda21bda0358c9fa48a +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..da90f7b --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6587cf405bf506cb39ebace242250b9bfa02cc31804c921ab6de63bcd126491 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..e8be9ca --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3141858d93fe32c6e360af4171da15b49d1ab05436547cea56653c4ef4e7f2b2 +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..5e5775a --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1901c72040241cc11387b5b995aba9fc1f872e596bba4b8fcb4264a7aed40f +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..310f354 --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc959c6e711bc2a0f597f8a8095337fcf1fe433942940c15f1a7315a0392ec4 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..320033b --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6991ce58241e1c8bdf8d67ebc69a46866f71874ee8bc01cfd334a10fa4fc618c +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..257b76e --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6c76dbcfa420e7ab696cd2d354ceceaa64aba0d8ad46d4f34ad1954ab00ff5 +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..f22b1bf --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9e3300ab67778b81dd7d4e03c2b58c4ec19dd881fede27de2ba91b9eab9989 +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..0c038a9 --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b02967a9fc2001c773d9a8868c09427086e189c43270caea2cc7ab1661a5949 +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..ce3922c --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b72b3d7b4e27e71e4d906e7f6084e8e88366b4b09920acd2bd08d512249337b0 +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..fc415cd --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf6cf9569f33925fae1f51d03c369750e5b6bbf26305ee46c5c8271af20ef4d +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..2f31093 --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40431656a27076399494d690a0873cd7f10238e9cb478551fe784525d7c5b9d9 +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..887af51 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4ae115ab706b1a724d9beb117709467a9c639f1f6a40538729902035a2447d +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..ceebb0d --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439680e147ce8cc472687e0cb8ab01e7ab0037bc2eec030e1e1c4c221704bf77 +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..07067d2 --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205d8b55f8fe218f5e10283d4c23fbcd940aba6ef85702b6b64443b614481a49 +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..8d51086 --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fdc649012086b29db6daedd0b91a2c45b9baee7d0a846b74fa73f3745f43116 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..8ad08f4 --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4e38fbe26c2bb30bc6bb81bfb9455f79de70630448e9e5dd1098fe468b86a0 +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..b7bb36e --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5276218d1cd8bc171ce6896c7f35147a0953dc4905fd841c0b9ef34b2315f01d +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..63fd9ad --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce4bc2dd45ad34d9622b714f68c01042f7ef2f1a2a30124d14d2fd8c6b3a263 +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..46d0acb --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f570ec5eb8c0973f7e7f19379563379e9fdd2b027662db0af7439d6c0724ef6 +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..ea86ba0 --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39d3fbef87f94082877765eed4539434d5d9aed324ae45e9a1238e900a511a4 +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..8701e4e --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a62e8634d376ba1f75596171fbac927a40ff03db984c77ed8a348b07d0f6e0 +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..4a0d3fe --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2483ea288f4a8fdb43c30ddd1cd4d8d3f16e174c236470d3e2a6b9e674c1c590 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..7c84554 --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1393039ae4872c87ab309da44f1f11d5a93406769c687be0a43d43e27f8b90c4 +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..0444e31 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4445c37891b442fbb1bcffec2e1f82ca9b7d9565a091b088eeaa18a4713f8214 +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..9dc8f66 --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d227036520d63916e2c36e56bc4bff6d50546ff359c09f4faa6e8b8bf8f90a +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..3100265 --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c232e1791bd2176d1a6733735f6c1385f011e52bd48e58103fe40de2f20f04 +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..f5463f3 --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202e6727843a2ac51e8c3fa349ed4222b6ea150b4b57c10c86031992520bc505 +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..9cc9101 --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93b74bad795a252937f2a6c7424bd222d65b57ee9748b95708a6e9c2199bcd6 +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..581668f --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1963b5290f8ac1707b84a5c7141bfe48f730d3c448784ad4c1ac4c74a6aad452 +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..ee4b6d8 --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec7bb1ee26e525bd84d77be9819888f4ccc8ffd80e131efb15ca8ecc9685900 +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..f6d21de --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:453c446dbe1b05e57d0e333abcd67c226eb90a316f39303dda6efbe00efa8406 +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..1dec6b3 --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca537c933dc90b22652ee0d21d440508fa5ff669c13b74d4672e731891b58da0 +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..788e223 --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894c040950e2b677fb4f3f153a37b4b7cdc33ec82e3c10faf75d52b9f078ca19 +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..28364ec --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc70d70a992850d122529a2900f3e8d99f139e5c0d266ed474c1a3d93b4eaf5b +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..e0a27ce --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d81db85f54dccfa52a901654f9ae6e11d8903dae4765918efced0668bfcbcd65 +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..1bbe267 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78263b16675c78952af55c4e656d2636f8f5326e060095779061ca865046bec3 +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..d8365d4 --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3778942755b9c693718c620e56d5a966450d61feb00a26aff591d6067848b1 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..ce48cf3 --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862ea4eb145c7c04ea3f1cdac027e8d9c2eabec8c6f856822c944af66b6782a8 +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..18643b5 --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dacc67c0dfd0895daf2010fd8f5c9ef294e281a16c944fd1596fa9a4bec36105 +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..ea62085 --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b46210f4e68039668b5ff5f72e3e9fd6418ed2726a6793ec22e0aec5c035b8a6 +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..955be8d --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0152ffe16629a65cd64edf5e20d9ceaa66928265b37afc5f381545b30d6716d +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..0bc0528 --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157055c3362b12ac0099bb0e187b2b35adae583341cf764bd22dbdc8dcf96ba0 +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..63b5da4 --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3d96973ccf245992a7f22b1dab33c58b1c66b167408597f7d654c78d41b95b +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..9e41b43 --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be22edd92f47cd49eda3fd810c8da3933cdd5f1f43117a72a4231aef87692591 +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..6f2cc21 --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a001721d5fd4f3dfb06858332398ac8bd31f6b2a3dd75dcf8a650926ed24a0c +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..4f76050 --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a9b0e59d1f4e181d2c40fd194e99efa345d868180a52d4efe1e12e9066a02a +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..6e9f3d3 --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099bd0f44898dc74d88f5e528f4edfe7d72f8893ccfa9977d9eceb3c993a51b6 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..25025ad --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032c4190f25561f9ac7113868c2e9b6f82b3717284c6c792a8c00674ea4cc0b8 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..b755757 --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98712c0e9470818337539ab6974f0741afd719f55e4bca241fb3ab9bc62ca71 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..7d5546b --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4013440ff6bf6c321b3045e93430a4e2c390b84b628459972c9c6c30faed37d3 +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..36a2a75 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c805c2e57af683a5e17391ac2c3fd35948512770c015f859db6b43c25fac346b +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..b536a96 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b784465f7b92a12577c94e55cf611259b96a3e91ab5672a3dd19576e32c4db93 +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..914cdde --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23b7d785f79b7eee7ac7e4e2d9ccaae7b7200da41229a66d2888ac1f72a45e7 +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..7f7db78 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58275699560292ff44fd75741eb2a6fd3ec396bd2e7a70672b00c18bac5b06dd +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..e49fb1c --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466241d8a6f2fd3dd361c031918279118bfba23fed4523a4460e4860533df1b9 +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..a9ca74f --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7e266ba34598f16588fa765a5f99c5c3d3b6d465ed9591c9221781dc184f4d +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..936d475 --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287d0eb5dbe28ce0ddab1b1622b313ae45c6fbdb18c7a11e6e7eb5f79ce48fbe +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..a502bd8 --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb751327973a5538698bf9968fba04ef836f918e27c924d4292d106bd319fc26 +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..a875410 --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f910782418d01e6f42b99c97c379d3fcd04c9c953e7fc299c1f874157bf4f2 +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..90dd9da --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6240242e5f523dab1b178e48d0aa993e8b7c601e44bd506fa497a0f459ca7154 +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..df64419 --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d797f4a23512aa2f47cc2d2c49499df327b06379f615e8a3dc25d336addd762 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..23f75a5 --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2070143b651ea95fcbf6fe1fc2bfcd204bba5c81f2203987a0733c7c5c7309ae +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..775cb85 --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d124ade35f98106851c1e71a154ac4df925b01b2565bc0a25e07a9ad190ef7 +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..65d7ef0 --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab14f9c97e2f3ac3f33304fc91d86e86c35aa96f495163f58f2ed61420848cd +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..9f376c3 --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62d6ebaf785aefe88c3213e9a4c899fff0b968d86ffa5b3044a5cd0eff5f652 +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..81130b1 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e044fd52b49a080364bcf8a31aac01f98990804e95f0ef1b28a4aab9de2215 +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..01a7004 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb7904b6463e4ad2ccf4da332a42e182f630a49fca40f7f67b4a023abaa48b6 +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..00902c1 --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985d621d19a38097287798ae1c4a418a5e5a10845619c925890834c357385c16 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..05bad54 --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725bd4f63a7ecdb231f6801eeeb806780ab2409a2d67b9c18bad0b9b81d909c3 +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..57ae0b6 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a9c66763b76d8c3b10ada8bdedc20db8fe6a70c3796278fbf20090b2caf91d +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..fd3471a --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875dcdfe02a089154763177071d019a43087d76646251d487b2b20fa8cdfb7f7 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..d16e421 --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd83d9ae20bd1217355faa63d9ea0544cd7d2d7d1c6f143425369bdf2b94c10c +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..942c937 --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7801b0af7d92200e923b1ddae93175334605746b30a13d3991d73c9c0d0a74 +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..f077568 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3ef333e30cbb86775a6206cb0f894f7be1b0ea0524961f61e8f0631f8e2379 +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..caf66cd --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e203cef6a46f3122812d4fee9b8492bc05f49377b707e53c6abb6972db16dd84 +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..8356f24 --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa3ce6e8ca54e38edee5eda23e9b77474e15d27fd1f02a82660dac0d81f9194 +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..d9a7854 --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3694976eaddf0f8aff292a63b4daf064f1fc22a1c98e866a86b05b8c893648df +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..326bc63 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917dbfbaee1035f9962412af55ff525f1bee7001b068ce0cf45b76a3ad5ee5db +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..0350eb8 --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ca9fdf174e2ea46cb6650a4f7c407c8503658407298f9d03553417a0d25e6b +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..92b1eea --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ebb3f56345e3621cbb2573e5410eda4c8761f3d1eee0428eb42a5f304d34ea +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..fbf317e --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9976166fa0391123835c01b418418ac4bdfca890c6ea6819df3a6cc7215d08 +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..c5ad705 --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89be4acbd930d50e95bc37a06f4d7513afb14ce44d14bc4e35b64905d2c494b0 +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..66f326c --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3891f3587ac956ea8f732b7c77e762d17425c9c39d575df2ea245099ce0ea256 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..f9f9999 --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93491c11e32985fad7987c688b11b664e6de13b44df8e2b90057319852388543 +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..264769e --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91206d25936e26903fdee6d36800261acb7bfc7ab1c50f83bdfdd8d3e28503cc +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..e9add0c --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a00618553b676b83f73723ab7a449fa474ca220c21ed53aa6d1778c49c025a +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..130a614 --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd3da41c936c9ad0576f498a7de7b0833744b5911a4a742420435181b556e1a +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..ed87e6b --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a4643564360ac1c727c82ece9ca90cd3c60e316540fada8ef38c16255de4e2 +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..2bcca93 --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d17a98c5829779d0fa71e1cc9ffe3023ad32156985d4727a7275a5ce5cc900 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..cafed64 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91652faef21b699799aa50787e86f8cb118ae10ac871e87098fef16373331d2 +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..6a8d3b8 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3957f011e4c1c6fa0596c9dd2bac928de348472fc87f2e3eb4f9533e26cf5ac3 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..e37a94b --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4182d42e4c5f5916e5aac7c3d230b55988aa3490f3f993b6858e9f84290a0770 +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..840940d --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dde72db121d0021f4135cb81bd4cc6f943af27a499e5e2093667414c818812d +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..e503229 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f1b15da29ace3f9f3a7344cb0f90127abedd990be655dfc356c8cd50b42e73 +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..050cb4e --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b7460268543ab810f5a8ef11896350114f68bed3e6276a96f134e81a1d883a +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..106e41e --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23504e8bf743a9173cee5e331ce28e61c44a55470d04e6f76d9dcee7c0499657 +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..e887a4a --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c534fcd8a59e29d6aede44ab99cb1d1df2161cfd0855b454e0f80b9741297635 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..9621c27 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3f594861bb6f2f4bdfe682af0ef368829b07cf0e9cfd91a4a1f4acf568267b +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..850def1 --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb2c5a0b1212768016c416627034ffb4ec47346b9d310c526a026bf534e5c8b +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..946d608 --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85391eb8deb8e63e5eea7389554fe3e48570ab3c1dd2976664b867d8bf028ce4 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..0162414 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392250bd6265480a8a5ffc718e123a3e9029332e90650b069fae59f16b2a6db5 +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..57068d0 --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189aa8f719f5a3a79c709a9c89762898bb34a3f30fe3ddbcc63606dde445a36a +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..d29b3bc --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b21a933d1bc442e82c64e2bee2a89245be5813f1b22eb97c8ed3bc511e3a01a +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..f245407 --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef14d88b383382b7a047b7b6baaf288d4b4831551d31a7a5d80716e997f3d04 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..3fb75b7 --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2469776c1a70efdafd1fb2b309a7d72b3684422329e4ff63452aa05c697c92f +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..2d76da2 --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b8429a57a247f8aff29571e3207a0692b1e2007f186aad65f6d65516e86e8e +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..f8c8108 --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c63393ca85b1609e25ab54ff77f68811e6b1870ef3c626789f6793238618c5a +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..cb883c2 --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c14aa86aa31d78b5a0b23c2ba6197e73a9d12c91acd8b8154cef7ccee246289 +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..de10916 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e894a37861115d53d16866d6e61cdf056e6c32158c7a0afeeb7878f5269d0b14 +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..66470ea --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad5354f61092f3daff59abbd669c56f6bca0a91f713558959b2cb1e0cda0b0e +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..5aef565 --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70945070122b970cee505cd89aeb20684bf27802917de4cc378a51464229c873 +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..898069d --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:824d6fc44dc893e78b0c87808c55ef84c6a85a7ac1a8de0a902ef4f10178a54a +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..de6118b --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39218a9346d5d92316486549520797cd972b0165e521c215420d8e7ad145dd79 +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..ed57270 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a0cffb4b6de0bfe56f898a5c3abce47e367ce8d16512b057a2afb81c376aa6 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..710c29a --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ffb86961b06f30589a1d0489c24b1b7e3e858e496eb990a1bfa98fd4a62620 +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..93d5144 --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ef7d0d8f854974a53f803033f11a2bdb929d58218d28b869e3726f83b9e935 +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..fc3387b --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e639cd2d9d4e347b6f2df32c294956e0f8652459eceb25e6f7b61f336a7159 +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..bde2dce --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e41ae2009b4cdb05208e190cc0d7c816302d0cec4d152139e24c839c7c41a98 +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..0d44990 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf2e42e97021187fb0f450c42d2cfaaabc59fb36c2f1cff10036777704df623 +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..dd65163 --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67f228e7fdb2fe18e208ef63d9f550b03668bf5046f280dc7b939a91f9b9c19 +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..ac026cd --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6583839dc093723fe1334da045ec132ba92663f2e1d9843392f7b3c9a85fa5 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..690d064 --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b21566b043c4db62be5a609831e1f7c30ee9af40e72d83a6b7ab728b4a694e +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..b45dd26 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26e292c192925c231e5e11fe261789d03f9736101d3a56d2bb08f99a20d4ce9 +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..d7e4b84 --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7332b94df56bb20130030c80457589318b4e9583cf776a80b5341d2531a49b75 +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..bdcd8b6 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7723de431b46b01e3f6c6be13f3d8a0666e3c8d19d75933bea0be8ac12db319e +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..785ff36 --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5e3b369e823738fe87573e133dfe41197c29679b2d494ff4ae01f5e62fda57 +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..c231865 --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55aadc0adc4d367b8d5f4744789f45b83044ce0b7957a9637e3264e9f172754 +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..afc81e2 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d74e1835eaf4616d016853d08bc2b618bac600c7120316de877e3840ceccc21 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..b8577de --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033e3f0fe9e05354929119e8fb925cb2c4525b7e739f7ebb25ec697f4c956506 +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..4d06629 --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05617720fbcb5c23a740e74500aa8c731dd052d1ebd30834a60de66fc50822de +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..b428385 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6b8cbde58178e258981151519fdff9370f0ffd98f6a407784290f820b4d4a2 +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..0cbbd41 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f8ae233e1ca61c47b0429b624c73abce6e500d2d63ce71d1bbd2bb4b3645fd +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..34036ea --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd98aa0b149660e012f9c7e5bb6e114e7e09d27a253bd7bd7803205e5e091229 +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..a82ecda --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293ddcc15f555982adc741f8854adf69c83cb76fbfdcbb8711de754489f5dc26 +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..5dbf78a --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4d73facfeb463b8bfeb66f4ecec20e74bdf8ea05bb079e3e15953acb3d0bf7 +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..44164fa --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd8baf59017ebe02e27306a78c0ff268827e99c6d6fbe7e7a01bdbf973b62d6 +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..f7d0a00 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b04daa499e618018717370cd6c0f1d14cecdd57126cefa16905a7898ff523cf4 +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..7d4953a --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e728191ccc9a64a8d9c33d816cf68c6b04c7a282d594680de7c787177aaedde9 +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..3dcf4e6 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54eeb77d02c6bd594ee5e04142de2ecc7de8ce65c2e93ecef639f4ddc043a89b +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..657c648 --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986a0cd4e23964a420c0a4c46de1eee0b949b105c90d1cad98fda065d9870336 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..9ca4fe6 --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220979ee9ec66e41ce97feafe0b5a3942a59dc1e825c68faa5c43239d9a4cd1a +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..89fc125 --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87149d039fa634ae11e028c7a602142c5477fc7b968660bed94aedb006d72d2 +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..abc6d97 --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016addb39abeb094740a76789c51bd383a8b1f2bc7d4145484131d55f666fa65 +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..fc396a4 --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b164bc320042a354527d12b1dfc8df7bbce5658281504be5115dfb0cc28f3628 +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..8f47266 --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a5b17d65fb8e4d6e66a774ab463f8be99cb5e88ab3f2133e11c2d1f50f4e4b +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..1119e9e --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48632842e8134e0db4aded944d495cb1a9e2280a029eb22ff784de9121a2e9b +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..d6244d3 --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990ca07641949193da280c10629a081ca063f71545f9cd32b8e90d041c9fcc2d +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..bc3d381 --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da855211aa25b78ef802badb6659f739b61240a30a4ead9b9ddf945be285ecb8 +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..1cf9af9 --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50339206af0b3cb68b49fee68886c0a0c49fc32f8f6ba8592d087d776e0da878 +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..3897681 --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4502becfb624601a61ac47a72fc0e3921028d98cfa1194e88746e011407584ad +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..2d3d52a --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9942c62426e6352bb9de8716fbfc587f9c84fe4d425c1235dda71b57880cd488 +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..2fda832 --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f98684e59d98bf01b5c16d60a56ab1b246e1d5f354a61e4e7517f4181193f5ed +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..5bd7301 --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d885c31178bae21914a52130e7be8dc8cc1f79d3787c103ea72643b4032d0ec +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..9b8a058 --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6adb2edd28eb857e7264882f19dba9797e9c5b1ea875647ee240763002caa495 +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..2eccd1a --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89000388d7aa49c731a8b37419a33e29db1d4af758ff89f31924de2a1c48e21f +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..e917cce --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335214968dc5b0dd766d0a5d25b61ced6f9e5c526628f92dd905047455ef0cd3 +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..3e1e741 --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a8a05d7ac0d23178311ef5b55b6c7bd1a07e3962412b41738ce41018c75f416 +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..5e596b0 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457b3199089acc467177088396d4be3ac13ac56d8ff99f278815d9c61d416e52 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..d70751d --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbcaa76651cb9a5e3cba69587cd1188e2e84c43a47741e5d83e510426a31b17c +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..f53af3f --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1d722c1037011aa82d1327202a0424e136b0864df4c0901055798255ad34b2 +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..aab1b07 --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c5de1151c6f1227e8b0da40353bb2cd55456713aafb1e2aefc2ca70ac02bc5 +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..60d517a --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5962a00c850c1a21da5872c81ad082fa1e771b39b72102037f6e0d74212ff17 +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..84b9843 --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d667794ae4f1404e40aef619d5826d68a6c2416d9e7e5df6983f70a36f0eb7 +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..41d1320 --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a41e78944f07c514c01d8f6bb5dbcc0ed682df416ed6b975b151a8f06a24754 +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..3c2809f --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde41eab230a5af3b6edd26ce644ff39f073ba8aa49a9a8a78625bc255d07167 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..aa60f34 --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a07026f3c9abab22b88a18dd744e58c3b40c37f34c3d342b86b2ea9b5858b4c +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..6902845 --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10be56fe6309cd3b35c0be5b5aaccd4623860d91bee79635ba2e8e3881e1009a +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..6bb1d92 --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ed1cde4e1ae7c7c658598e43e422be20ff92e13f95efb4b43600e2efa3f3b9 +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..c5845b3 --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca470820fbfd55a7eb278bc3c3737d1cde73aae2d5ff26ccbc30b826de8a92c +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..456aa24 --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af7db2e2624303d9d6b45e25518f270148716bd6e53e8dc7cb07b1a7bbd3fb7 +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..e4f20be --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d8e71edd9558716b839aa37c50faa2c0d9ac1d653d652ba83254a2aa9ad200 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..65a3bf1 --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615d8124b192e77814d4932b9bbdeaaba5ea286a33bacb2a9bc97b1e1721285b +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..2d4e6b7 --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bdc16c29df4bc4bf4f2c5d48b5a2dd242c86624a667983470c4c08cb93a40d +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..2579983 --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0936ecf483219f65b005580da2e4468b064a7a7fb89e4c31ef059fe995b0a99d +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..12d97a4 --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb10f27e68ccc4a3a29d075ef1bf8324584e92454efa022d511548d59bd0229 +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..e4fb263 --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3859917cbb6db542d6cf562eb90927757de58abd1b4a2a0c90d7922a63f96a +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..8cac3f7 --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48572bfadfcf677f0fc45410c158b6cd17bb498811770d7babeb17119ae156d6 +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..9d6c18a --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24444f8e3082f9ebb1df357a3a9c03477f602d0d669f8fe44d8761294f10e086 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..cd9d4d9 --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fd161a163751fe5fd2a2a8a7c6cefac546ed37532877a7f41652577cabbbd9 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..efc12b3 --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad09aae1f4ca0fbe2693b4650d5adf691f8700d056325db6668e9a813e1bdd6 +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..e18bae0 --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1140cc10dad9599a62360a459c9751db063b3ade9d8330762e081cfe74bea24e +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..9965655 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f46732913882f5873a3d4e751218a92ba52cf39f17f7d3ccb8e9f0251c399b +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..2cacfaf --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694f609381d43faf395ef20dc51e9e67540a5cae5c486aaf45296859d1f7b66d +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..0921fbc --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92c3a21270c190142e386218973e24f0f1f90d47212e2d1a1dcd5d098a61b6a2 +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..0ae9e09 --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1a76f526580aac6707359c78a0e731c3e1429f89174cab98fff3f55940e467 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..74e065f --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f784a4c77eb2645e45b4de6e56a08969c8ee22c25566af097165cc97d3ff69 +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..e4fe437 --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb918d06e94d97631db1c6c5eb751c7a701537f77944c98c01979f602a142d7 +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..bd9c513 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654b4e5b7e586843c857c61233e6fcd66789b673e81c7274823b89b162b51487 +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..130c6ec --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7abf1d616089711c5f620c854fecd6e21db18fa5556231d23ac91e66dd618f +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..2184d06 --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a1039fbc322c687d9b8511c606dd411a36d6ba24a4f2d0249b9d150d641997 +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..711e0a2 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7446bea231de3a56017ccaafdb5b0123af7dcddbc588a9d36c94dcc3717ed1 +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..6766e54 --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2fcf31f62623bd885096dcfe259ae091c818a8707b2b895909247082221546 +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..5425d4f --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb582c5c85ce5a6ee44f770ece2e37cb8751109061dbf8fc559821b885009019 +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..1ef0966 --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d11973cf5e1c90bf7f87a141f69941ce53a6b2d0ea472a6fc7d61cc9c5255b +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..1686f94 --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887820b70be04e9da67a73b0a32f27706f5fab700e68491f0d3011c66e4400d4 +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..decdbcf --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb6402b670decfcb7b58f16dd4a3da1c16889d1358a23674e4401b418034466 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..724ac4b --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77423ef9f134c0302cc9e62aad6ae9ae6e552b730163e40969dc1958f5ee7cc8 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..45652bb --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730194aaeff9de543d34546aee6dab77d2c5a439a79524efb3de24ed4e3d8b85 +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..cd40ed6 --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc5ef25f4db1a82cd92eb3b2f0d185ed90f64985d2df2b27bc6fa2ecc7aa6ce +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..f83ca70 --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29d385fefb6c9f05342325238b7608e575c701f5071797d55383ff65ea0fab6 +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..5446ad9 --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf918cf97fcccb1fc7452970f940903748bfbdb890ecd950cb050e60615c267 +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..9abd663 --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c765c132fbc0e4e36d63229480ef9e6615a369dcf937599207f1ed49f34b17cb +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..c9858b6 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a3a76a47b502514e0d50c01a11f93db424ea113b84a505725af895ee9dfac1 +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..84df6ae --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc708cbbc7dc381ef0dc6b21be1461251cdc09971eb7df8a5e1aa4e164607eb +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..a268ecb --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a8194324a45f42e7c55bafa933d7f631a3364377b4a1f00b8e90e1d6a10b0b +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..bfddd06 --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfdd92a25b280ed6570c21671b2f4525ec8956b96c2db735b341d6f79b128dac +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..f28a8ad --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6514625ac2a6c0f65c56fa2c635a259b0a983c5982b41c3337ebc6b82d894bc3 +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..e498a02 --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0adf2e9ea8d4a4b5296d7bfe7c4713162d8097ac00f39a4fab8181cf5d6045e1 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..58621ef --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e191c288cf2cad71dc696093c5e3139602dd7a1686b6c5551c2c062fb2bb6e9 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..4a44a99 --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166ef641c79a0e23563f69ffe23bdfbeeb81237ef00a8ab0848e8894084f9b72 +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..b5386a4 --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaffe481e149c3f5e384c9ec6a0c11b563e9a19c0fbf9ae6da752870b082002e +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..f699e88 --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad3764b7e4b8f0f701434ca83337d4bc0567f2aec413487c39e81e481e8e22e +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..72841f3 --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585443b60a96acba1302742eeaa875b7ba9a8039b1f6c008705e27cd4594d58f +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..f909698 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec415b20436eee9fa5df92dce49170195c9a4f0c5f766434a81722dc89a6abaa +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..f48f70d --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff6af7b2e83b2f65ef10c2bd711218208e807a12a2491a3ae539a9babc42c4b +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..e8229a8 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7517740ae988cfcb3d0780f7642292891e867cb910e76afcaf1e33a437c43d8 +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..b67a196 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b568a23cd1ac21c390d34cdc4d509ac9ddd3f9321314772b6c6119b72e46021 +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..002f07d --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079219dcd26c2bd4bc5dad90d37aec8088f3596b74fc12048fcc796a72f99fe4 +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..3075d1c --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e31085c112b3ac86918d0b443f88b42835891a13d6fbe9b4284c40571584fdf +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..833e094 --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5913593d0a843d4eac3b0595f7a48e37f52fcedc0be449d0eee7a3deb6442c +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..da15b9c --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0c82671949a79bdd63403e6863ca81f5dbe16ff28543ce4fc5a73a9b91088b +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..bbfaf0f --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80a1ea6633b2bc7d47af39edab3134b13de622f0c5158d2c3fce670b8843d41 +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..2e22580 --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d631f4e5c1cb3fb68311cfeacdcea97f2c5328281f3ac75d6ba1afe102fef46a +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..7719017 --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6e68c870f1641e8412b2728f34239aea2e8104a81e5d32ec221c6c3717805d +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..f4d3c1d --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118c1913205835b03ba344477c9bdc024b5c391965a32e459b0fd0d0f3a244c0 +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..ba6ed24 --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78fa209b22dba8a938b7b2cd91509b67be0a9805baddbc73136ee4d7e663314f +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..40b5aad --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550771bb026084eb991a3eca359c03c25ed82412ce9204d3842e80fb759cde67 +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..1803b74 --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64425c75bf424f9009e8ccfa218290c1412d452bf9cae020a05b55c4b391430 +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..9cd1832 --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d9c736b55d2beb9a828a36e0342371b83d178ae18778d1b70331349db7fd2 +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..8c9149f --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04d4c28c989d551d13e22a7f68a8c804772d610a6e16d5c6286ff275052dcee +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..cdc135a --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ded66190a3b9253e54eb78668a3325e9054f6d41f277998e2d84676cb06d1d +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..5cbeb15 --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146577e12b67195eee5cc31301e29dd9c88c462625601a633af65717cd1360e4 +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..ab285e4 --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac8091e4f72d9969fce71928141c7522c5176a329b8c3d80717f79577cc74b6 +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..94d6ac5 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352b011f7a608ba7c96fdcb6e340abe43bee7143341a2f72a814a87d95d3c602 +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..3495ddd --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d474d53bd5e570f901101c54f4174300c844536d942214b6589bb6f805e1b1 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..274a9c4 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45fc8e9412366faf3f6130dc09963c000871d09e42ceee804e8efda57f69a39 +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..43f10c7 --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fb22708b55207657cd6d1623d28dc0c12c1cea79f77e0dc19af2e5766caf95 +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..d4035c7 --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e156f89daadc7b18b8214b10d2ff9a8f1a556fad0e4f1ac58a324d082575e6d7 +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..5c3166a --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a519ca9b69bb0c28e75e6d81aa49b7f5b391a3279cb91e7e5eb0de9a70b0789f +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..ae8e000 --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e44920ef0828b1a4446b4dfbc2a66bf4635cd6d5aec85f047aa71bd17754b6 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..1aa8c2c --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22bbbd195af2c404ec6b354e1904deced961ce032f7999ee96035afd491e9246 +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..096e1b6 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e487430a02dabc138b021ffdfd6d5ede951067e05f0d247f6ce4c99259075e +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..bfd57bd --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6391c81ba4f4016d144f1662d4a01765f269dd87808594e756641df4a91409b +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..db4f62e --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54583e46447f350b11ef4b6186c5cfb88579690beacd0e7a3bffbbfa1aae2cd3 +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..e439c1f --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486ea74bf82c0a0095d1657cb11b6618b4c01e00087c5d3f46c1ba4b41278324 +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..6227d6d --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848c010dca3f75b1afad489eafef0c720b2558a15ba1be34410441a66a2d5226 +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..d8c3348 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9721e5b2dca73a0943cf1eb3ace79f52df02d04109e0403872ae3379cf71c7d +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..89800c3 --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa872f7f3e2b8b36897386c8ae62b202d0e16ef0fed288307694c5903fb0191c +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..87be2ff --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381d649006f3a4a44c5ebbc2a9cbd87dbef457a6f5535d4ecbf4d09bd4be9823 +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..7cf72f5 --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536cb96690402b60ef0ea4d838769717aee7ff05f09f822c88af256fc6aaaf22 +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..32b4fd9 --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47762a124cca68e5cc5f3992c3c141fc9a1efc8063355fb1ac3a901da904d63 +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..1456fd1 --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef7655340d08c532c3b66862df3a78ce676aa4251b355742821b02b7bf76e6db +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..8fe0992 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd7c9fcc1913b18e106ec41b354f39e3ea1fff20d9536e25dc52b62c860b95c +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..ee819c5 --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d042935b3d19a9586914d5c506edd03a4861285d9ff563261eb28b24eb5e61 +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..2ce635d --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cc2877da35e5c09f932fe7f1e9f83c10004f40e756bb2a3b85e6483e6a2f67 +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..7133c05 --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9406ca3b142d30273750531fbaf6656d2d4294d124598bfd848e1389a44d040e +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..85c0232 --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26475637ed150df928d3cccc6c48efe7374f9f48ed62eb4c9cc137c7b7deff1b +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..d54cb9f --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681aba9047d9034b4e0eac7db0c1eee86a90e2be28a6bf75139acef1ac151103 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..e19f7f7 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0b3b910e08cf0cb883ec2b30c78ea025ee4350de22ab7ba10df8a58bd0e44d +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..d3256a3 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d9dfc99725c0cfd6c3d546a02e653cb59ce6e8207a80ea5e9b18a85f2ed46a +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..cc67b6b --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a4db3c216f12979b3f095221d6c933df2865ffe3043419ab6abb36de330f83 +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..f4469a6 --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3b8b2431d1e0c9e2125656cb7f280aa56f4c813a6cfad5fdeeaf06c09d1ce4 +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..c37787e --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77cbc3c3f15b8060db0814e259919320984c6a835882177d87ee534b2a765f3 +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..69b78df --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1ed73ea3152d669fe6c0bfddad2ee2cd443a0377902b0a0095cd1f31dcd657 +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..4c55af0 --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fdc8d28b2ada3caab784c567a759276b476b1d776d5b04612b9e2cb915fe15b +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..083aad2 --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2921ca73c5840a03786b91586839fa4f9b2218275879d3cd76cf34f0f1a2e0c0 +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..9323a2d --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb614850fa6cc9f9a877fe614ee10aa6be9d6c803b80c9644341d4730718583 +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..d7517d4 --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24e84030f428be5649176420f6e7325050cb88b6d82aa7b91943792754c1897a +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..47f0958 --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5077eaa5418edaadc8018ed79b3f4dab2d74dc63291226bff54b505a0d9ed985 +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..9565424 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:656f45ebeba7b387dcaad1e846c3508e35c5720974a06ff111f58f1f276dd066 +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..4c74436 --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85dae0e09d08d284285550897dcbc77a51ae73b29852830a0bdb21c0e764c60 +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..5dfdca5 --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a977f0bbeacdfaa532f21ec0e2bd360e1d1f603365063c220e4d63d0d69c4912 +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..13e6564 --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894edb1c0176879071477a7d326d7910c867f5fd2def27cc624841a208d1ba5c +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..6e52d36 --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1f55fd60138a443878ac2dfb4943ef57b5d30f4306773cd4dd2dfe24b53441 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..37b0c04 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fbf2ffea5d9b081c2b8f26e42b7ebf32dc3b3d2f79a54fd2096f83983fb9fe +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..b46da02 --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b66964e04f363fc2d2fdd03a75a8d9700cff0d5764bf1f64fa829cecd580400 +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..d1f1940 --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55a5086a8b62857bb2c42efc37f27cde4f07f1afe90b1f51b4983a4dfc7e285 +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..e6bcadd --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5be58dfd7a1512e0e08566f9379e0f2c7aac809c088295344f4a4f926763d1d +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..a2033db --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a018e3895dfd5e6d94d9c7fafa86294c0b26ea10618cc7cf2e854164d3a0295f +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..0f9ec27 --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951354d6598d0d1daadd807b4b15227185270037accce83f41ef9234cf1fa04a +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..cebad88 --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937e8483c678ffe1be38b36d2d35acdd01f83a6c8176619c2ad2ec74a8edeb9f +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..63bfac2 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ef5edc7fd85429613fb1df855f5e7ce3adce685a93c91afbb1167a36a70ed6 +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..c888a61 --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f5f4ed2bb3487d49d802f5df2dd2e8a224ec98fd30c2b01c531879acf37a10 +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..464f5c0 --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2e4cab1b91c349f221a24d874d22c68818fc6b12d8f5252de894c097b28273 +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..a4fd581 --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04dbd6dfad873e95749246478f649b053288b6de811c0f137c3d0d6c84beb3c3 +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..ffc0dc6 --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c0dc5895146ce219628f87a6893843e2f60bdfafbc90ef0b329ca503b2daad +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..34faeaa --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202c70f22dc35efa0bc94b19de7ab2c820c09689fcd037eff8050d3be717efe3 +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..ea64a07 --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4484ed1af1f5ceb90b58ace3afd9885880c510cf1d68f44f7ac65116855e39 +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..a0a0352 --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb094f9e0b4a33daacd3d0d6a39ba304cfbf54f96f5842452c494d31fdcb1a5 +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..d96093a --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942e72b155db59e1ee044babf6c5420be3a28c4a8b77b6a00a1341dc7d27cc7d +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..fead436 --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f12f39154ba7ed434b574bb70239a9061174f2d4bd4e169fa0a7d6f1923279 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..9401598 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752d8448b24001bdd1bb8cb395a2347455ae7ecab6e8af61f737c9cd62f9f60f +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..c38ac97 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a4a0cb1586274c92511c19d0f3201e7b870b0b6c65edcaaacd63f06b34d269 +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..5e70682 --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de238eecc7d68464da3ed99a25f2f6dee2a33c92432bea2b021ff5cde9c6bd4f +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..31b539a --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2109bbc71a5e92dbe075a3576a3f92b3eba89403fd8553f795a461bb3814ecce +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..062ec9a --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751e7e8de86926f7887f055163d7e6caa4c9248c55cf148d67668d5b73b72872 +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..ecdac26 --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77944fd9ab5d0379e38e9755d82560ebf02bf4d0efc7a5297f5ca7e7d5c79bba +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..2f1a491 --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b77158785357bacecbd1b4dcb8cbfeffa8554055967fdfbc7220ffde671a184 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..f296b19 --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de6fd00f58e64b13b536cca22545d7443484681b7ea23099cb0c6e4d1a1af73 +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..85b6f30 --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:059ad26cc7345798e38b38ded1d2f06db012a88bffb5d1854de8464f4e0585c0 +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..ed00759 --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199e3e460fe37fb262c7bd8b71053fc937e2f3744d24d3bc2a5df5c772515e99 +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..2ae0aab --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a2f1b8562aab734b2493a3dc322f397ba788312ae9bbeaf2c45875d98f29bd +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..75a6000 --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087159abf95d2c6adcd8577a764c3b0c6dfe20406646dd4656b4a45fb2ddd5d3 +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..cbea053 --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b70fbc6bdb40dac8643c72342f153f8c86d99b2385dbf9759dc31984365952 +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..dbabb74 --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c473afa426cb6b0bc4b2f411c873bfd0d87bfd3976f97330e9eb4cd6bb469f88 +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..052e62b --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95e1e76d97a19bd45659e298d3ea8f9fdef661bfaf76da718bea8e609ba1ccd +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..85cda9f --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d9314562040e3409f5463929d35f3afd430a1438c0af13706affb17bc546773 +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..25e94b3 --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2264690fae7d7f973a6b654f9f2ef5a6c83c4f02d5f435d190017c170a8ff84d +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..b38ad44 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aefab419aae4df5530ba6c1f464dae61eb8cd9684262ce4bee0d079d2363dc6 +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..2cb8f75 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9414b7e24edc5ba35029ee3407f6780ebfea3fc763827916052320d0883860 +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..0203677 --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6fef8ad67149df6206ab150ac3fe53d6aedaccfa0d0a875e32974cc4e52f7b9 +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..348bac3 --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe8ee7914596130824279faf6a945ac7cede460de98a420da84b887fa86ffd4 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..2a6b49f --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89114b18498e61a39a0b501a5ab6c3b565ccd2f9e22663b4a0a5c1e9dff3460b +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..e38e8d1 --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b074fb50dfdda3711b952daab8860bf9c445aac7542b380c1ef2e5ca9e08a7 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..8ee0ebb --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712ca4daa7ae40142e4360443d21870cebc26db333ed1c79e1783aa42d930b6e +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..574ee1e --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57aabb3743ddd3e80e28dd2179e59f66864ad2f95791e1c30498e5b797f694ce +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..8c2a27c --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3cdf6ac190e6bc6db054670af20a8c836d0efea28ab295d732fc23030d35a2 +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..e382b51 --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ae5831831713343e5e7b5f4be55df62350eb7fdadd16fab5620b2f924b3ddf +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..e8d9d64 --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed6209570bba411bc2ad2ecc8a42b5f638c24ed4b5eb95011096852ab331a64 +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..25ee43f --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b47a6ce3f88d4dff891caf6a73c5f3cd67de42ccefd7ff99f1c35b04e48963 +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..76b0b1d --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14b16f0a417d542f58518e1844f3b51ad809d352769ea9c51e8681fa0048ede +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..7fdb6ad --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b996364b2672bcfd832c5dc89f32493cedbcb0c552e336680cfd9b410de5c4 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..08d2803 --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b12d83a6e9575752c5eb61f5f50ac104e9e8bf81247d6b35d65c8949e05233 +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..5b3a327 --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6abafa624867174e118a02f15b4a4d2676956f47481df008ece54aec82ef53d +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..b105e31 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0150b5d6bc8d5d1f0249e55fbae977812c5ad135b2754013a8c729829591d0c +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..8c72ed2 --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ac072f4e55524b0a006e72b69e8762766bbfec06e53ea1e23658c8e3b247aa +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..81909f8 --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4b1b6dfd048c564cc1ff9f265f562fe5928f25e4a275482cf4c050989f6464 +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..5b6c0e6 --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1fdf810e4ae9a03b8b3778fd7b294b994357bfa1a2b272e08d2ac70d561410 +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..389dacf --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7a6d65148af79440258f1f132f8bd83dcf8754ee006b70749ebf68d9646149 +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..fe8b21d --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db52ce109af1791c4bace0159916e71d00ff72ff4bfb06e781619d90630a19dc +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..2376f8c --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5e3f4d71fa81091bcf89cb90c0d2e938794951bee87b9256979f4a988a6372 +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..a36c88c --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a044f58b6940ff4184a711dae00f04645548bd13214cbf108714e6a86b8dfe61 +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..6c58d75 --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdfc9dfbb56d147da0c7fa4048b13a8251aaea04d074995145dddc79ebb1dc3 +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..8765dd1 --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b12ac5a20581001de29e299c81cf490acd0ff964fea50163ad1a2a7e7b980a +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..e3575f4 --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c6053061527336d7d266f5ad3c898d6368634d08013969b6267169412f944e +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..be76c2c --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaf0fb3bd59156d08ac2609cd1202ca2d43a51510e29da2c1b4defe47d6cacc +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..58b5d41 --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cc528224183b5518395f2cfca9d632f1fad1c0449db3c79466e3949cd98b59 +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..42528c3 --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01fc01310eb88131d2a7efe5fad77958384e8c38b1b02817323a6917059c2c60 +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..5007148 --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497502c67d2d9a393a09e623adb63840731e32836ac9172be79697b090a017a3 +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..c8c14f1 --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3a1dff509f45dd9fe123ac94c4bfb9205562c3fd0dcaf47432793702303ba6 +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..5969dbc --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad20e8975bda181b2b6f480a5d01d9aadfbf8d727f9f12d96327613ba53546a +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..7711563 --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d02215332e35403ba3b03472835252ec8df3cdefc72d97d485e0730ffea72080 +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..9b9b80b --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86997a9229bb2acebf89aa98b71a89e7ac3956d6d4b8f9d2636b8f940f75a149 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..27f39e7 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5637cd00cbf0ce1ee932afac9d3928b6260696a522531e8807e784890d9dc3d4 +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..a279c69 --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aa2c4a500f12779402ff0ad9ab23885e6266f8ed6cfe1db85e9e9b212b86bc4 +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..d115d86 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f9b103033c0dc25b6b74c8006ad60eac7c96c429a95ab1ed314bd391a6e331 +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..a18b0ea --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3875e6277f0cbda5c2cbf4c59587f215c07b8b7575ebc06397734c4c9034b14d +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..ea4062b --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc1e4b0f0830b60a091760d71d0bc5253c6d17b9f20a0bc1fea6bb1a12dfdad +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..aa2d209 --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2926a23a7c5439cbaa784f072194753e7e11e7f4d48b3aaafdd6bc91eaa7fef3 +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..3bfd275 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e5ac3a8b0ce4820c387a869a3353f798853dab5843880bdc56424ec044c83b +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..cd13970 --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491e3186e7a0a0a7101d8d8fb57ee258379b7a6f3ce27855be56509aebe74400 +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..cd71039 --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dcd7a41b1c8db0838312f84ed3203d71d1c799970a0060872abc8757c7516ea +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..90295bc --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7a47dfc513f458426b586d4ce3f506b5eb4d86bd901fa3236d11c550630107 +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..1610e5e --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5bb64b5e3e071bbe6d3a9a2c52c32ac6c834ffecf44b964c7c3032dca72452 +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..5bc8642 --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd07802b71c168da1fe13a26a3228e12855e978b5952fc7a169d2cfce37d0c0 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..ab33a10 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f172c26aad3e2997cc46ae91873e6a81442df90f7a6db8d04aee73c1ed490f34 +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..bd84b1b --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bebeeac459f551b679ab889fd3c7d76916be21a8b66f8f58d0c61486354c3d4f +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..7308aad --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c51b264a7aa60278668d82838aa09a880f2f787bdf16dcb766bead623b2f0e +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..4d9b8b0 --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aca1305edfb957b418dd62df4b8f9b883427a11a24b2e30db451eece083ad64 +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..f5b5923 --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05738fb31a4045a9cba6e80649629781a651db97bbf31135945dd7e70b357cde +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..f292d0b --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8dccdef639b96b270c580bf60461cb6cf83023ca2ba55c4240d8e950c1422c8 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..45d5f4c --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49512fce8177a0bc2309003ec6d1c8ca0ef96f50dcfc3bfb5f6965801962310e +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..8e89569 --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff7f0a855b71af072617f8fa3307cdf2c58ef7b791022282e28eff44d94f6e6 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..fc8f2ce --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a357e561e840bb8a4e5bfab62b8167fe9a9f2552d775206a15ae827062c137 +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..283a4b6 --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5baab2bbadb83bba8ff0901821130760a0a082a8c39ef0e57d12445b22f837 +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..04edcee --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11f08301da937499b63305862cf5b8f1cf7649a329f459e890cf7a9895e2bf8 +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..50d858a --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a367b422e8a2986627011ac77a6c5882d4caf6ee6c54f26702574856f589969 +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..d475e08 --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22daf3cf0fe3261b11f4cb49f985cc7abdcbc426a1e2229b657f6676c2b06381 +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..5f184d9 --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d632af818369a17e0ea6ea7aa8eba7c07b7cdf753f5b83e18d69c5ed809c38b +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..018561e --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db3f426a428bbe166a4a50509cb1698eafe6dbf2eb8f1fcd4d540df3e4da1c55 +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..fc118d3 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3253b72bc720372a52dabf7dac4899696c2585f073e61c6e4abaec3fb586ed8 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..574e096 --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efc3e0c2491529dd5c8283616702ed9ea2b07ea9dc23c29a05987023ff988f2 +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..da1b55d --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75be26dc6a1fa6959823a2fe0e92433e41b421d120a8526b4e38b4fbf00a79d +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..fa38f82 --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b21bc07a71a5eec36bb83a483062879c454bbc29b84e4120c8b80dc09d0fd4 +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..b873ba4 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d8fcc564f6c9f0c4a41a223e5496f7a2f5de6b740755064c9909b470b34a1d +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..10e225e --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d214dd7dca8349afca8089eb18cae2e41b7dbcf145acfbdbe97dba6930df165d +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..ae78acb --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d545146fda51d4fbd91e108b1ed5cd9c8ee5fdfec6152522a5075025e69eb0 +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..041b5cf --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccdd32dd9e4cf1c8712ce849772e233538eddf5a514688a9db218c5301985799 +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..648c996 --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f79173185f73573de31022b1ba3fb2e07514c10a5089869d0541fe969df648a +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..6be6bfb --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7840058005a0dea9639d76ee241a42b40b8d4887fbc35d4a3c7f47263542afc2 +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..b68e617 --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4891acaff078b1d89e810778b35f48f9cd7f382244b2d8e66c3891bc3476a8d2 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..5a3eebb --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ba79e22999eb710b9044317b9328131d0448ffd0157eee879d6d501a9e916d +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..a9f359a --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011e85fb21fe2a518986be93a653d68d0b86dbec8f7bd1e7204f43e9b0c278aa +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..dbd1cfb --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186af4d41f852af9e85ae82c0f48d0c0699b02f150eeaa0ba1bd0b4260ed0e52 +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..3affce9 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40607c32b7ebc57a86ea3645db418fcdcca7f9b2b73f70368724863094521283 +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..3a0b870 --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2dc91b14326f9dc26d001e65be99541674eb64d18c6d7e4d395a851776e976 +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..79d7ff2 --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdae2078af01ef0633e6868de5e45940b8e32e6ddbb86745b29c98342a1f3d69 +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..148b596 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e74fd61f1712abdd9b38aba3d2162ab7e800d21f4725c2956e201383c322e3 +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..faeff05 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104e557311206e84b76f4dc8bd73c4c1ef169855a912b50cc72181c851f71522 +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..076d1da --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9782e100bb02aaf37d7abd11e5a7257b9c8edcc827ea802e5ac24caefa76d5 +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..a8c4255 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f4f95c2e4813a31f1f28d91fbe7b93ddb6d630f52d8a8e425134ae5d9cad01 +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..d3578c9 --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141fc6923342b888d64fed699de30cf78f25074392827905b8a5f61bd61d453d +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..21c93e6 --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80e5b1c90afdb96a132b5db3a7ddbb1fd38c88c1936743ec6021d4e269affa7 +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..6dd45ce --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de1888affb307329ebc0585d3f9951561a083b723792f604fbcccd48714020b +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..1e7df36 --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3523798c42ee5817bbcbed68e6c62cf5f873377bdc5459e675aaa28e26787654 +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..2a535cc --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e1c8b1f200dbb58f2d53239a260cada4bbf5dab8237b9a6c8bd2c7b183633d +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..13d9bb0 --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94a5c98e857872a18674e6508b81fa8b353ed1a3afdb60909cd2b10270185f5 +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..1e0e856 --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f77824af62f8e1390d340794c900956c81c31eb5e71dc9c9fbaebeaabf4592 +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..e75d719 --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6832f298425ad8a0952e79e05c1641a44c510632565bd3911a790e4f8551f0d +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..cedd1f9 --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6792706c54a1ea58593dbddd44d74868e424f34ef3bfbb859dc29a5e94ae152 +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..6fa1d31 --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5fae885a3687ae8e6fe88d4f7d6456c3b7b467c4cf41b9d346a72654c5a090 +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..d29f3af --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b6e43560157f5fad50541fbc11d2872cc2124f3e76be15dbbb160e4446d11f +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..9f8475d --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f281aebba9a655615a5c1e1aa973c565862da3c2315fe40b224a0dae66a1da1c +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..6ddf477 --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f1e668fedba069a94b6241a11e5cffaa31b0876ce6b18ef7994341ea1961e7 +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..17f1f58 --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82a25c202084a08a27e38655594d3d5d79ffddaa5baf4159a5220d053336287 +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..0a10f5d --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94122af2a2ea1719476dbf2f798e47df2c8e81875c5e550bd9bb90a9f3d4cf11 +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..a5eba0c --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5183c93f9bd736e04ba6393b3fce70a1bfef506861383cc9e053d08ed109d8 +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..ee8c736 --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f861743de5e81e8a046cfddb511887806549f0181665e29fb2648a2f35a569 +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..c8d2498 --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c573079d405a1da607d70e32035d11914da2197d029f1a82c1dc54d135ad662e +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..eb4d304 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22c152a4020d0047ec70b23c451f592e7cb316eb85eff5849f3cdaaa916f6e1 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..dd5d2a3 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270b8c527ae97712e4715e78123d51c5bb38220ef32b4cf06e5aab7333aca6ed +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..68fa805 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9b48361b169e9029804ee5ed64872e378611dfcf92bbb2f1deeb32354a66c0 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..dc748b7 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616f3d766776afe1ff79c8b3dd99dd3488612db4a17c0c573b73db45178a9565 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..eeb46b7 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea341b427f64a8ea12550c64ceb487bc692e72c4ae0241c0643536a03823237 +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..86715dd --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b44191e19959d92f448f494d7c385cbf2fa0286df12d2ee79548ecbbb7c5dc +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..b738fe1 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5b5c1171f7698fc4afaf5e6b75ac7424b65ad6da2c79af9b154d386127c04f +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..fa20874 --- /dev/null +++ b/train.log @@ -0,0 +1,1160 @@ +2026-04-29 15:39:47 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-29 15:39:47 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/workspace/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, disable_thinking=False, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-29 15:39:47 - INFO - __main__ - Training/evaluation parameters NewDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.05, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eta=0.1, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=200, +eval_strategy=IntervalStrategy.NO, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=None, +hub_model_id=W-61/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/runs/Apr29_15-39-47_bc4ce3cd7c4e, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +q_target=0.45, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449, +s_star=0.4, +save_hf_model_artifacts=True, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=50, +save_strategy=SaveStrategy.NO, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/workspace/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=new_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=llama3-hh-new-dpo-multi-beta-sweep, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-29 15:39:47 - INFO - __main__ - Using W&B project from training args: llama3-hh-new-dpo-multi-beta-sweep +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: - Waiting for wandb.init()... wandb: \ Waiting for wandb.init()... wandb: wandb version 0.26.1 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /workspace/dynamic-dpo-v4/wandb/wandb/run-20260429_153950-4022pu2h +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/4022pu2h +2026-04-29 15:39:52 - INFO - __main__ - New-DPO parameters: beta=0.05, q_target=0.45, s_star=0.4, eta=0.1 +2026-04-29 15:39:52 - INFO - __main__ - Using persistent HF datasets cache at /workspace/dynamic-dpo-v4/hf/datasets +2026-04-29 15:39:58 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.). + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 15:40:02,080 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-29 15:40:02,080 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 15:40:02,080 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 15:40:02,080 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-29 15:40:02,080 >> loading file chat_template.jinja + Normalizing raw HH preferences (train): 39%|███▉ | 17165/43598 [00:01<00:02, 11165.52 examples/s] Normalizing raw HH preferences (train): 96%|█████████▌| 41643/43598 [00:03<00:00, 11253.84 examples/s] Normalizing raw HH preferences (train): 61%|██████ | 26681/43598 [00:02<00:01, 11475.52 examples/s] Normalizing raw HH preferences (train): 42%|████▏ | 18323/43598 [00:01<00:02, 11278.63 examples/s] Normalizing raw HH preferences (train): 64%|██████▍ | 27837/43598 [00:02<00:01, 11496.38 examples/s] Normalizing raw HH preferences (train): 45%|████▍ | 19491/43598 [00:01<00:02, 11392.67 examples/s] Normalizing raw HH preferences (train): 67%|██████▋ | 29000/43598 [00:02<00:01, 11296.07 examples/s] Normalizing raw HH preferences (train): 99%|█████████▉| 43083/43598 [00:04<00:00, 8583.37 examples/s] Normalizing raw HH preferences (train): 47%|████▋ | 20660/43598 [00:01<00:01, 11475.28 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10471.92 examples/s] + Normalizing raw HH preferences (train): 69%|██████▉ | 30174/43598 [00:02<00:01, 11419.52 examples/s][INFO|tokenization_utils_base.py:2323] 2026-04-29 15:40:02,489 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. + Normalizing raw HH preferences (train): 50%|█████ | 21823/43598 [00:02<00:01, 11518.08 examples/s] Normalizing raw HH preferences (train): 72%|███████▏ | 31362/43598 [00:02<00:01, 11546.52 examples/s]2026-04-29 15:40:02 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-29 15:40:02,611 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 15:40:02,612 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 15:40:02,628 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 15:40:02,629 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. + Normalizing raw HH preferences (train): 53%|█████▎ | 22981/43598 [00:02<00:01, 11533.44 examples/s][WARNING|logging.py:328] 2026-04-29 15:40:02,633 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-29 15:40:02,635 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Normalizing raw HH preferences (train): 75%|███████▍ | 32661/43598 [00:02<00:00, 11639.29 examples/s] Normalizing raw HH preferences (train): 57%|█████▋ | 24687/43598 [00:02<00:01, 11466.50 examples/s] Normalizing raw HH preferences (train): 79%|███████▊ | 34324/43598 [00:03<00:00, 11429.18 examples/s] Normalizing raw HH preferences (train): 59%|█████▉ | 25839/43598 [00:02<00:01, 11477.47 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Normalizing raw HH preferences (train): 84%|████████▍ | 36660/43598 [00:03<00:00, 11521.03 examples/s] Normalizing raw HH preferences (train): 63%|██████▎ | 27500/43598 [00:02<00:01, 11324.17 examples/s] Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 87%|████████▋ | 37818/43598 [00:03<00:00, 11536.26 examples/s] Normalizing raw HH preferences (train): 66%|██████▌ | 28659/43598 [00:02<00:01, 11390.56 examples/s] Normalizing raw HH preferences (train): 68%|██████▊ | 29840/43598 [00:02<00:01, 11501.86 examples/s] Normalizing raw HH preferences (train): 91%|█████████ | 39504/43598 [00:03<00:00, 11421.61 examples/s] Normalizing raw HH preferences (train): 71%|███████ | 30998/43598 [00:02<00:01, 11523.13 examples/s] Normalizing raw HH preferences (train): 93%|█████████▎| 40673/43598 [00:03<00:00, 11488.57 examples/s] Normalizing raw HH preferences (train): 96%|█████████▌| 41837/43598 [00:03<00:00, 11528.86 examples/s] Normalizing raw HH preferences (train): 75%|███████▌ | 32703/43598 [00:02<00:00, 11458.95 examples/s] Normalizing raw HH preferences (train): 78%|███████▊ | 33863/43598 [00:03<00:00, 11494.66 examples/s] Normalizing raw HH preferences (train): 99%|█████████▉| 43083/43598 [00:03<00:00, 8822.30 examples/s] Normalizing raw HH preferences (train): 82%|████████▏ | 35639/43598 [00:03<00:00, 11381.61 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10792.02 examples/s] + Normalizing raw HH preferences (train): 84%|████████▍ | 36795/43598 [00:03<00:00, 11424.67 examples/s] Normalizing raw HH preferences (train): 87%|████████▋ | 37944/43598 [00:03<00:00, 11438.90 examples/s] Normalizing raw HH preferences (train): 91%|█████████ | 39654/43598 [00:03<00:00, 11364.92 examples/s] Normalizing raw HH preferences (train): 94%|█████████▎| 40800/43598 [00:03<00:00, 11387.56 examples/s]/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( + Normalizing raw HH preferences (train): 96%|█████████▌| 41961/43598 [00:03<00:00, 11444.37 examples/s][WARNING|logging.py:328] 2026-04-29 15:40:04,309 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 8932.40 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10767.37 examples/s] + Loading checkpoint shards: 14%|█▍ | 1/7 [00:02<00:12, 2.00s/it]/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[WARNING|logging.py:328] 2026-04-29 15:40:05,111 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Loading checkpoint shards: 29%|██▊ | 2/7 [00:03<00:09, 1.95s/it] Loading checkpoint shards: 43%|████▎ | 3/7 [00:05<00:07, 1.95s/it] Loading checkpoint shards: 57%|█████▋ | 4/7 [00:07<00:05, 1.95s/it] Loading checkpoint shards: 71%|███████▏ | 5/7 [00:09<00:03, 1.89s/it] Loading checkpoint shards: 86%|████████▌ | 6/7 [00:11<00:01, 1.89s/it] Loading checkpoint shards: 100%|██████████| 7/7 [00:12<00:00, 1.59s/it] Loading checkpoint shards: 100%|██████████| 7/7 [00:12<00:00, 1.78s/it] +[INFO|modeling_utils.py:4926] 2026-04-29 15:40:15,340 >> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 15:40:15,340 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 15:40:15,344 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 15:40:15,344 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-29 15:40:15,347 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/config.json +[INFO|configuration_utils.py:765] 2026-04-29 15:40:15,347 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-29 15:40:15,350 >> loading weights file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-29 15:40:15,351 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-29 15:40:15,356 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-29 15:40:27,177 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-29 15:40:27,180 >> loading configuration file /workspace/dynamic-dpo-v4/base_models/llama-3-8b-base-sft-hh-helpful-4xh200/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-29 15:40:27,180 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-29 15:40:27,182 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-29 15:40:27,182 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:40:27,196 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 15:40:28,662 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:40:28,663 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:40:28,663 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-29 15:40:28,681 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 15:40:28,685 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-29 15:40:28,688 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/workspace/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:522: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `NewDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-29 15:40:28,887 >> Using auto half precision backend +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/workspace/dynamic-dpo-v4/.venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-29 15:40:36,637 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-29 15:40:36,637 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-29 15:40:36,637 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-29 15:40:36,637 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-29 15:40:36,637 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-29 15:40:36,637 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-29 15:40:36,637 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-29 15:40:36,638 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-29 15:40:36,639 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:40:38,202 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:40:38,211 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-29 15:40:38,216 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:02<30:47, 2.72s/it] {'loss': 1.3875, 'grad_norm': 41.73493576049805, 'learning_rate': 0.0, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5002857446670532, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.02287006378173828, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'KL/chosen_KL_mean': 0.00527191162109375, 'KL/rejected_KL_mean': 0.028141021728515625, 'KL/mean': 0.016706019639968872, 'KL/std': 0.272699236869812, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'epoch': 0.0} + 0%| | 1/681 [00:02<30:47, 2.72s/it] 0%| | 2/681 [00:05<29:25, 2.60s/it] {'loss': 1.3897, 'grad_norm': 36.00978469848633, 'learning_rate': 7.246376811594203e-09, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5008214712142944, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.06572261452674866, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65568923950195, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'KL/chosen_KL_mean': -0.03498649597167969, 'KL/rejected_KL_mean': 0.030735015869140625, 'KL/mean': -0.00212840735912323, 'KL/std': 0.24797174334526062, 'logits/chosen': -0.49536412954330444, 'logits/rejected': -0.4594460427761078, 'epoch': 0.0} + 0%| | 2/681 [00:05<29:25, 2.60s/it] 0%| | 3/681 [00:07<29:15, 2.59s/it] {'loss': 1.3872, 'grad_norm': 35.48371505737305, 'learning_rate': 1.4492753623188406e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5001957416534424, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.015658468008041382, 'margin_dpo/margin_mean': -0.015658140182495117, 'margin_dpo/margin_std': 0.39206600189208984, 'logps/chosen': -60.986106872558594, 'logps/rejected': -68.66145324707031, 'logps/ref_chosen': -60.981597900390625, 'logps/ref_rejected': -68.67259216308594, 'KL/chosen_KL_mean': -0.0045108795166015625, 'KL/rejected_KL_mean': 0.01114654541015625, 'KL/mean': 0.003316923975944519, 'KL/std': 0.2563997805118561, 'logits/chosen': -0.48171138763427734, 'logits/rejected': -0.4422028362751007, 'epoch': 0.0} + 0%| | 3/681 [00:07<29:15, 2.59s/it] 1%| | 4/681 [00:10<29:32, 2.62s/it] {'loss': 1.3882, 'grad_norm': 35.9489860534668, 'learning_rate': 2.1739130434782606e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5004501342773438, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.03601771593093872, 'margin_dpo/margin_mean': -0.036018311977386475, 'margin_dpo/margin_std': 0.3561931252479553, 'logps/chosen': -56.77029037475586, 'logps/rejected': -86.6136703491211, 'logps/ref_chosen': -56.7677116394043, 'logps/ref_rejected': -86.64710998535156, 'KL/chosen_KL_mean': -0.0025787353515625, 'KL/rejected_KL_mean': 0.03343963623046875, 'KL/mean': 0.015432953834533691, 'KL/std': 0.23463661968708038, 'logits/chosen': -0.4681958258152008, 'logits/rejected': -0.44056397676467896, 'epoch': 0.01} + 1%| | 4/681 [00:10<29:32, 2.62s/it] 1%| | 5/681 [00:13<29:23, 2.61s/it] {'loss': 1.385, 'grad_norm': 44.74127960205078, 'learning_rate': 2.898550724637681e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49965283274650574, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.027776658535003662, 'margin_dpo/margin_mean': 0.027777403593063354, 'margin_dpo/margin_std': 0.3397705554962158, 'logps/chosen': -53.8150634765625, 'logps/rejected': -84.13265228271484, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'KL/chosen_KL_mean': 0.04430961608886719, 'KL/rejected_KL_mean': 0.01653289794921875, 'KL/mean': 0.030420929193496704, 'KL/std': 0.26933568716049194, 'logits/chosen': -0.5143798589706421, 'logits/rejected': -0.47071516513824463, 'epoch': 0.01} + 1%| | 5/681 [00:13<29:23, 2.61s/it] 1%| | 6/681 [00:15<27:52, 2.48s/it] {'loss': 1.3844, 'grad_norm': 45.87062072753906, 'learning_rate': 3.6231884057971014e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49951091408729553, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.03913220763206482, 'margin_dpo/margin_mean': 0.03913196921348572, 'margin_dpo/margin_std': 0.38666093349456787, 'logps/chosen': -63.02406311035156, 'logps/rejected': -92.70105743408203, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'KL/chosen_KL_mean': -0.016576766967773438, 'KL/rejected_KL_mean': -0.0557098388671875, 'KL/mean': -0.036144837737083435, 'KL/std': 0.2481634020805359, 'logits/chosen': -0.5242589712142944, 'logits/rejected': -0.4836902618408203, 'epoch': 0.01} + 1%| | 6/681 [00:15<27:52, 2.48s/it] 1%| | 7/681 [00:17<27:20, 2.43s/it] {'loss': 1.3868, 'grad_norm': 41.22108840942383, 'learning_rate': 4.347826086956521e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5001072883605957, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.008586883544921875, 'margin_dpo/margin_mean': -0.0085868239402771, 'margin_dpo/margin_std': 0.36728373169898987, 'logps/chosen': -57.74927520751953, 'logps/rejected': -103.88645935058594, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'KL/chosen_KL_mean': 0.025547027587890625, 'KL/rejected_KL_mean': 0.0341339111328125, 'KL/mean': 0.029840022325515747, 'KL/std': 0.2671242356300354, 'logits/chosen': -0.5003604292869568, 'logits/rejected': -0.4664100110530853, 'epoch': 0.01} + 1%| | 7/681 [00:17<27:20, 2.43s/it] 1%| | 8/681 [00:20<27:04, 2.41s/it] {'loss': 1.3912, 'grad_norm': 39.53245544433594, 'learning_rate': 5.0724637681159424e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5012041926383972, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.09634339809417725, 'margin_dpo/margin_mean': -0.09634318947792053, 'margin_dpo/margin_std': 0.40796253085136414, 'logps/chosen': -58.7188720703125, 'logps/rejected': -79.2179183959961, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'KL/chosen_KL_mean': -0.002834320068359375, 'KL/rejected_KL_mean': 0.093505859375, 'KL/mean': 0.04533374309539795, 'KL/std': 0.28405576944351196, 'logits/chosen': -0.5020660161972046, 'logits/rejected': -0.4754522442817688, 'epoch': 0.01} + 1%| | 8/681 [00:20<27:04, 2.41s/it] 1%|▏ | 9/681 [00:22<27:36, 2.47s/it] {'loss': 1.3871, 'grad_norm': 42.5697021484375, 'learning_rate': 5.797101449275362e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5001616477966309, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.012944132089614868, 'margin_dpo/margin_mean': -0.012945234775543213, 'margin_dpo/margin_std': 0.432614266872406, 'logps/chosen': -69.84687042236328, 'logps/rejected': -99.56974792480469, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'KL/chosen_KL_mean': 0.0199737548828125, 'KL/rejected_KL_mean': 0.03292083740234375, 'KL/mean': 0.02644728124141693, 'KL/std': 0.3076080083847046, 'logits/chosen': -0.49002158641815186, 'logits/rejected': -0.4431573152542114, 'epoch': 0.01} + 1%|▏ | 9/681 [00:22<27:36, 2.47s/it] 1%|▏ | 10/681 [00:25<27:38, 2.47s/it] {'loss': 1.3846, 'grad_norm': 35.374786376953125, 'learning_rate': 6.521739130434782e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4995613098144531, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.03509734570980072, 'margin_dpo/margin_mean': 0.035097718238830566, 'margin_dpo/margin_std': 0.32590410113334656, 'logps/chosen': -48.33961486816406, 'logps/rejected': -80.38908386230469, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'KL/chosen_KL_mean': 0.018072128295898438, 'KL/rejected_KL_mean': -0.01702117919921875, 'KL/mean': 0.0005231276154518127, 'KL/std': 0.22773060202598572, 'logits/chosen': -0.4773544371128082, 'logits/rejected': -0.43332165479660034, 'epoch': 0.01} + 1%|▏ | 10/681 [00:25<27:38, 2.47s/it] 2%|▏ | 11/681 [00:27<28:35, 2.56s/it] {'loss': 1.3802, 'grad_norm': 34.34830093383789, 'learning_rate': 7.246376811594203e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4984557032585144, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.1235695481300354, 'margin_dpo/margin_mean': 0.12356960773468018, 'margin_dpo/margin_std': 0.4112103283405304, 'logps/chosen': -52.961402893066406, 'logps/rejected': -87.84849548339844, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'KL/chosen_KL_mean': 0.055454254150390625, 'KL/rejected_KL_mean': -0.06811904907226562, 'KL/mean': -0.006332814693450928, 'KL/std': 0.2968614101409912, 'logits/chosen': -0.4542468190193176, 'logits/rejected': -0.42898106575012207, 'epoch': 0.02} + 2%|▏ | 11/681 [00:27<28:35, 2.56s/it] 2%|▏ | 12/681 [00:30<28:45, 2.58s/it] {'loss': 1.3858, 'grad_norm': 45.08433532714844, 'learning_rate': 7.971014492753623e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4998508393764496, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.011913254857063293, 'margin_dpo/margin_mean': 0.011912867426872253, 'margin_dpo/margin_std': 0.4435839056968689, 'logps/chosen': -61.842933654785156, 'logps/rejected': -104.90767669677734, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.8582763671875, 'KL/chosen_KL_mean': -0.037494659423828125, 'KL/rejected_KL_mean': -0.049404144287109375, 'KL/mean': -0.04345113784074783, 'KL/std': 0.3219500184059143, 'logits/chosen': -0.5271201133728027, 'logits/rejected': -0.4905800521373749, 'epoch': 0.02} + 2%|▏ | 12/681 [00:30<28:45, 2.58s/it] 2%|▏ | 13/681 [00:33<29:05, 2.61s/it] {'loss': 1.3874, 'grad_norm': 39.63324737548828, 'learning_rate': 8.695652173913042e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.5002469420433044, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': -0.019751250743865967, 'margin_dpo/margin_mean': -0.01975110173225403, 'margin_dpo/margin_std': 0.3618961572647095, 'logps/chosen': -64.26197814941406, 'logps/rejected': -87.18495178222656, 'logps/ref_chosen': -64.2603530883789, 'logps/ref_rejected': -87.20307922363281, 'KL/chosen_KL_mean': -0.0016231536865234375, 'KL/rejected_KL_mean': 0.018131256103515625, 'KL/mean': 0.008255481719970703, 'KL/std': 0.2522842288017273, 'logits/chosen': -0.44067633152008057, 'logits/rejected': -0.409400999546051, 'epoch': 0.02} + 2%|▏ | 13/681 [00:33<29:05, 2.61s/it] 2%|▏ | 14/681 [00:35<28:43, 2.58s/it] {'loss': 1.3834, 'grad_norm': 42.595924377441406, 'learning_rate': 9.420289855072464e-08, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49926379323005676, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.0589028000831604, 'margin_dpo/margin_mean': 0.05890271067619324, 'margin_dpo/margin_std': 0.36086153984069824, 'logps/chosen': -58.09579086303711, 'logps/rejected': -104.09156036376953, 'logps/ref_chosen': -58.11021041870117, 'logps/ref_rejected': -104.04708099365234, 'KL/chosen_KL_mean': 0.0144195556640625, 'KL/rejected_KL_mean': -0.0444793701171875, 'KL/mean': -0.015029460191726685, 'KL/std': 0.254509299993515, 'logits/chosen': -0.4786085784435272, 'logits/rejected': -0.43931445479393005, 'epoch': 0.02} + 2%|▏ | 14/681 [00:35<28:43, 2.58s/it] 2%|▏ | 15/681 [00:38<28:38, 2.58s/it] {'loss': 1.3856, 'grad_norm': 32.06040573120117, 'learning_rate': 1.0144927536231885e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49981045722961426, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.01515999436378479, 'margin_dpo/margin_mean': 0.015159964561462402, 'margin_dpo/margin_std': 0.37245649099349976, 'logps/chosen': -56.99365997314453, 'logps/rejected': -80.85054779052734, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'KL/chosen_KL_mean': -0.0267486572265625, 'KL/rejected_KL_mean': -0.041904449462890625, 'KL/mean': -0.03432337939739227, 'KL/std': 0.24552780389785767, 'logits/chosen': -0.501712441444397, 'logits/rejected': -0.4834703207015991, 'epoch': 0.02} + 2%|▏ | 15/681 [00:38<28:38, 2.58s/it] 2%|▏ | 16/681 [00:40<28:17, 2.55s/it] {'loss': 1.3852, 'grad_norm': 42.0484619140625, 'learning_rate': 1.0869565217391303e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49969929456710815, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.02406895160675049, 'margin_dpo/margin_mean': 0.024068236351013184, 'margin_dpo/margin_std': 0.3988404870033264, 'logps/chosen': -61.74287414550781, 'logps/rejected': -84.39653015136719, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'KL/chosen_KL_mean': -0.00298309326171875, 'KL/rejected_KL_mean': -0.0270538330078125, 'KL/mean': -0.015021562576293945, 'KL/std': 0.269855260848999, 'logits/chosen': -0.4899655878543854, 'logits/rejected': -0.4453532099723816, 'epoch': 0.02} + 2%|▏ | 16/681 [00:40<28:17, 2.55s/it] 2%|▏ | 17/681 [00:43<28:05, 2.54s/it] {'loss': 1.3808, 'grad_norm': 39.231082916259766, 'learning_rate': 1.1594202898550725e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4986092150211334, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.11126986145973206, 'margin_dpo/margin_mean': 0.1112699806690216, 'margin_dpo/margin_std': 0.33091142773628235, 'logps/chosen': -67.65924072265625, 'logps/rejected': -85.43882751464844, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'KL/chosen_KL_mean': 0.05109405517578125, 'KL/rejected_KL_mean': -0.06017303466796875, 'KL/mean': -0.004536911845207214, 'KL/std': 0.2485760748386383, 'logits/chosen': -0.5107743740081787, 'logits/rejected': -0.47472792863845825, 'epoch': 0.02} + 2%|▏ | 17/681 [00:43<28:05, 2.54s/it] 3%|▎ | 18/681 [00:45<27:55, 2.53s/it] {'loss': 1.3859, 'grad_norm': 41.25909423828125, 'learning_rate': 1.2318840579710146e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4998858571052551, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.009129971265792847, 'margin_dpo/margin_mean': 0.009129911661148071, 'margin_dpo/margin_std': 0.3549841642379761, 'logps/chosen': -47.746822357177734, 'logps/rejected': -75.4887466430664, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'KL/chosen_KL_mean': -0.00733184814453125, 'KL/rejected_KL_mean': -0.016460418701171875, 'KL/mean': -0.011897072196006775, 'KL/std': 0.2411435842514038, 'logits/chosen': -0.4928959012031555, 'logits/rejected': -0.43723440170288086, 'epoch': 0.03} + 3%|▎ | 18/681 [00:45<27:55, 2.53s/it] 3%|▎ | 19/681 [00:48<28:04, 2.54s/it] {'loss': 1.3814, 'grad_norm': 36.71526336669922, 'learning_rate': 1.3043478260869563e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4987553358078003, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.09958422183990479, 'margin_dpo/margin_mean': 0.09958454966545105, 'margin_dpo/margin_std': 0.33764326572418213, 'logps/chosen': -70.17350769042969, 'logps/rejected': -89.82533264160156, 'logps/ref_chosen': -70.20536041259766, 'logps/ref_rejected': -89.7575912475586, 'KL/chosen_KL_mean': 0.03185272216796875, 'KL/rejected_KL_mean': -0.06773757934570312, 'KL/mean': -0.017944127321243286, 'KL/std': 0.23240481317043304, 'logits/chosen': -0.47301602363586426, 'logits/rejected': -0.42177867889404297, 'epoch': 0.03} + 3%|▎ | 19/681 [00:48<28:04, 2.54s/it] 3%|▎ | 20/681 [00:50<28:04, 2.55s/it] {'loss': 1.3828, 'grad_norm': 37.05985641479492, 'learning_rate': 1.3768115942028986e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49909985065460205, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.07201334834098816, 'margin_dpo/margin_mean': 0.07201322913169861, 'margin_dpo/margin_std': 0.3497501015663147, 'logps/chosen': -50.786705017089844, 'logps/rejected': -78.87882232666016, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.82334899902344, 'KL/chosen_KL_mean': 0.016534805297851562, 'KL/rejected_KL_mean': -0.05547332763671875, 'KL/mean': -0.01947064697742462, 'KL/std': 0.24900861084461212, 'logits/chosen': -0.5658631324768066, 'logits/rejected': -0.510959267616272, 'epoch': 0.03} + 3%|▎ | 20/681 [00:50<28:04, 2.55s/it] 3%|▎ | 21/681 [00:53<27:49, 2.53s/it] {'loss': 1.3807, 'grad_norm': 38.864524841308594, 'learning_rate': 1.4492753623188405e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49858027696609497, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.11359718441963196, 'margin_dpo/margin_mean': 0.1135970950126648, 'margin_dpo/margin_std': 0.3924105763435364, 'logps/chosen': -50.05282974243164, 'logps/rejected': -77.97219848632812, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'KL/chosen_KL_mean': 0.010187149047851562, 'KL/rejected_KL_mean': -0.1034088134765625, 'KL/mean': -0.04661019146442413, 'KL/std': 0.2971247434616089, 'logits/chosen': -0.4945378303527832, 'logits/rejected': -0.47060784697532654, 'epoch': 0.03} + 3%|▎ | 21/681 [00:53<27:49, 2.53s/it] 3%|▎ | 22/681 [00:55<27:50, 2.53s/it] {'loss': 1.3784, 'grad_norm': 42.72419357299805, 'learning_rate': 1.5217391304347825e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4979976713657379, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.16020318865776062, 'margin_dpo/margin_mean': 0.1602029800415039, 'margin_dpo/margin_std': 0.4115052819252014, 'logps/chosen': -59.031890869140625, 'logps/rejected': -97.63912963867188, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'KL/chosen_KL_mean': 0.025745391845703125, 'KL/rejected_KL_mean': -0.13445281982421875, 'KL/mean': -0.054353222250938416, 'KL/std': 0.2926764190196991, 'logits/chosen': -0.4607342481613159, 'logits/rejected': -0.4157930612564087, 'epoch': 0.03} + 3%|▎ | 22/681 [00:55<27:50, 2.53s/it] 3%|▎ | 23/681 [00:58<29:00, 2.65s/it] {'loss': 1.3753, 'grad_norm': 40.804290771484375, 'learning_rate': 1.5942028985507245e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49721741676330566, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.22266075015068054, 'margin_dpo/margin_mean': 0.2226608693599701, 'margin_dpo/margin_std': 0.4632441997528076, 'logps/chosen': -60.02766418457031, 'logps/rejected': -81.31217956542969, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.13955688476562, 'KL/chosen_KL_mean': 0.0500335693359375, 'KL/rejected_KL_mean': -0.17262649536132812, 'KL/mean': -0.061298683285713196, 'KL/std': 0.3359306752681732, 'logits/chosen': -0.49854540824890137, 'logits/rejected': -0.47655850648880005, 'epoch': 0.03} + 3%|▎ | 23/681 [00:58<29:00, 2.65s/it] 4%|▎ | 24/681 [01:01<29:00, 2.65s/it] {'loss': 1.3768, 'grad_norm': 46.37499237060547, 'learning_rate': 1.6666666666666665e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4976065456867218, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.19149301946163177, 'margin_dpo/margin_mean': 0.19149288535118103, 'margin_dpo/margin_std': 0.36473649740219116, 'logps/chosen': -44.28029251098633, 'logps/rejected': -99.30596923828125, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'KL/chosen_KL_mean': 0.0107421875, 'KL/rejected_KL_mean': -0.18075180053710938, 'KL/mean': -0.08500338345766068, 'KL/std': 0.29481637477874756, 'logits/chosen': -0.5174161195755005, 'logits/rejected': -0.5007544159889221, 'epoch': 0.04} + 4%|▎ | 24/681 [01:01<29:00, 2.65s/it] 4%|▎ | 25/681 [01:04<28:58, 2.65s/it] {'loss': 1.378, 'grad_norm': 37.40980529785156, 'learning_rate': 1.7391304347826085e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4978786110877991, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.16974028944969177, 'margin_dpo/margin_mean': 0.16974005103111267, 'margin_dpo/margin_std': 0.45375657081604004, 'logps/chosen': -52.57656478881836, 'logps/rejected': -89.55143737792969, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'KL/chosen_KL_mean': -0.03951263427734375, 'KL/rejected_KL_mean': -0.20925140380859375, 'KL/mean': -0.12438388168811798, 'KL/std': 0.3535291850566864, 'logits/chosen': -0.45742011070251465, 'logits/rejected': -0.42607590556144714, 'epoch': 0.04} + 4%|▎ | 25/681 [01:04<28:58, 2.65s/it] 4%|▍ | 26/681 [01:06<27:40, 2.54s/it] {'loss': 1.3678, 'grad_norm': 44.840396881103516, 'learning_rate': 1.8115942028985507e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4953131675720215, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.375026136636734, 'margin_dpo/margin_mean': 0.3750268816947937, 'margin_dpo/margin_std': 0.46361756324768066, 'logps/chosen': -53.86589431762695, 'logps/rejected': -103.67782592773438, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'KL/chosen_KL_mean': 0.05691337585449219, 'KL/rejected_KL_mean': -0.31810760498046875, 'KL/mean': -0.13059790432453156, 'KL/std': 0.4097515344619751, 'logits/chosen': -0.5315337181091309, 'logits/rejected': -0.5001455545425415, 'epoch': 0.04} + 4%|▍ | 26/681 [01:06<27:40, 2.54s/it] 4%|▍ | 27/681 [01:08<27:29, 2.52s/it] {'loss': 1.3622, 'grad_norm': 47.54275131225586, 'learning_rate': 1.8840579710144927e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4939061999320984, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.4876362979412079, 'margin_dpo/margin_mean': 0.4876362681388855, 'margin_dpo/margin_std': 0.5126945972442627, 'logps/chosen': -42.780418395996094, 'logps/rejected': -99.09371948242188, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72419738769531, 'KL/chosen_KL_mean': 0.11811256408691406, 'KL/rejected_KL_mean': -0.3695220947265625, 'KL/mean': -0.12570391595363617, 'KL/std': 0.4389367699623108, 'logits/chosen': -0.531154990196228, 'logits/rejected': -0.4949020743370056, 'epoch': 0.04} + 4%|▍ | 27/681 [01:08<27:29, 2.52s/it] 4%|▍ | 28/681 [01:11<27:34, 2.53s/it] {'loss': 1.3706, 'grad_norm': 38.3453254699707, 'learning_rate': 1.9565217391304347e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.49602949619293213, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.3176928162574768, 'margin_dpo/margin_mean': 0.31769293546676636, 'margin_dpo/margin_std': 0.5036317110061646, 'logps/chosen': -60.5284538269043, 'logps/rejected': -91.69076538085938, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'KL/chosen_KL_mean': 0.028047561645507812, 'KL/rejected_KL_mean': -0.2896461486816406, 'KL/mean': -0.13079789280891418, 'KL/std': 0.37808555364608765, 'logits/chosen': -0.516849160194397, 'logits/rejected': -0.46265852451324463, 'epoch': 0.04} + 4%|▍ | 28/681 [01:11<27:34, 2.53s/it] 4%|▍ | 29/681 [01:13<26:32, 2.44s/it] {'loss': 1.3611, 'grad_norm': 46.29709243774414, 'learning_rate': 2.028985507246377e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4936005473136902, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.512132465839386, 'margin_dpo/margin_mean': 0.5121327638626099, 'margin_dpo/margin_std': 0.6046355366706848, 'logps/chosen': -57.73973083496094, 'logps/rejected': -97.83842468261719, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'KL/chosen_KL_mean': 0.06805419921875, 'KL/rejected_KL_mean': -0.4440765380859375, 'KL/mean': -0.18801212310791016, 'KL/std': 0.4913862943649292, 'logits/chosen': -0.5535327196121216, 'logits/rejected': -0.5072311758995056, 'epoch': 0.04} + 4%|▍ | 29/681 [01:13<26:32, 2.44s/it] 4%|▍ | 30/681 [01:16<27:11, 2.51s/it] {'loss': 1.3522, 'grad_norm': 45.00627899169922, 'learning_rate': 2.1014492753623187e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4913354218006134, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.6934210658073425, 'margin_dpo/margin_mean': 0.6934208869934082, 'margin_dpo/margin_std': 0.5992348194122314, 'logps/chosen': -52.41510009765625, 'logps/rejected': -99.02035522460938, 'logps/ref_chosen': -52.577369689941406, 'logps/ref_rejected': -98.48920440673828, 'KL/chosen_KL_mean': 0.16227149963378906, 'KL/rejected_KL_mean': -0.5311508178710938, 'KL/mean': -0.18443751335144043, 'KL/std': 0.5754395723342896, 'logits/chosen': -0.5093830823898315, 'logits/rejected': -0.47936421632766724, 'epoch': 0.04} + 4%|▍ | 30/681 [01:16<27:11, 2.51s/it] 5%|▍ | 31/681 [01:19<27:42, 2.56s/it] {'loss': 1.3609, 'grad_norm': 34.56964874267578, 'learning_rate': 2.1739130434782607e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4935287833213806, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.5179520845413208, 'margin_dpo/margin_mean': 0.5179519653320312, 'margin_dpo/margin_std': 0.6779955625534058, 'logps/chosen': -63.69575119018555, 'logps/rejected': -73.30078887939453, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'KL/chosen_KL_mean': 0.11116981506347656, 'KL/rejected_KL_mean': -0.4067840576171875, 'KL/mean': -0.14780662953853607, 'KL/std': 0.56305992603302, 'logits/chosen': -0.5045328140258789, 'logits/rejected': -0.4596520662307739, 'epoch': 0.05} + 5%|▍ | 31/681 [01:19<27:42, 2.56s/it] 5%|▍ | 32/681 [01:21<28:16, 2.61s/it] {'loss': 1.3491, 'grad_norm': 43.04856872558594, 'learning_rate': 2.2463768115942027e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4905046820640564, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.760206937789917, 'margin_dpo/margin_mean': 0.760206937789917, 'margin_dpo/margin_std': 0.8800061941146851, 'logps/chosen': -62.524940490722656, 'logps/rejected': -89.86312866210938, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'KL/chosen_KL_mean': 0.2145843505859375, 'KL/rejected_KL_mean': -0.545623779296875, 'KL/mean': -0.16551779210567474, 'KL/std': 0.735187828540802, 'logits/chosen': -0.5065813064575195, 'logits/rejected': -0.46486425399780273, 'epoch': 0.05} + 5%|▍ | 32/681 [01:21<28:16, 2.61s/it] 5%|▍ | 33/681 [01:24<27:30, 2.55s/it] {'loss': 1.3587, 'grad_norm': 37.19485092163086, 'learning_rate': 2.318840579710145e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.492986798286438, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.5612262487411499, 'margin_dpo/margin_mean': 0.561225950717926, 'margin_dpo/margin_std': 0.6160410642623901, 'logps/chosen': -53.164703369140625, 'logps/rejected': -88.35009765625, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'KL/chosen_KL_mean': 0.09627151489257812, 'KL/rejected_KL_mean': -0.46495819091796875, 'KL/mean': -0.18434438109397888, 'KL/std': 0.5440672636032104, 'logits/chosen': -0.4748949110507965, 'logits/rejected': -0.4482702612876892, 'epoch': 0.05} + 5%|▍ | 33/681 [01:24<27:30, 2.55s/it] 5%|▍ | 34/681 [01:26<27:43, 2.57s/it] {'loss': 1.3456, 'grad_norm': 40.41777801513672, 'learning_rate': 2.391304347826087e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.48959898948669434, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 0.8326817750930786, 'margin_dpo/margin_mean': 0.8326810002326965, 'margin_dpo/margin_std': 0.8663803339004517, 'logps/chosen': -50.72563552856445, 'logps/rejected': -102.662841796875, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'KL/chosen_KL_mean': 0.09169197082519531, 'KL/rejected_KL_mean': -0.7409934997558594, 'KL/mean': -0.32464924454689026, 'KL/std': 0.7199804782867432, 'logits/chosen': -0.48803627490997314, 'logits/rejected': -0.47066670656204224, 'epoch': 0.05} + 5%|▍ | 34/681 [01:26<27:43, 2.57s/it] 5%|▌ | 35/681 [01:29<28:02, 2.61s/it] {'loss': 1.3309, 'grad_norm': 43.86104965209961, 'learning_rate': 2.463768115942029e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4857790470123291, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.1397216320037842, 'margin_dpo/margin_mean': 1.1397206783294678, 'margin_dpo/margin_std': 1.1088385581970215, 'logps/chosen': -50.898338317871094, 'logps/rejected': -107.83799743652344, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'KL/chosen_KL_mean': 0.12615013122558594, 'KL/rejected_KL_mean': -1.0135726928710938, 'KL/mean': -0.4437118172645569, 'KL/std': 0.9808096885681152, 'logits/chosen': -0.538067638874054, 'logits/rejected': -0.5010450482368469, 'epoch': 0.05} + 5%|▌ | 35/681 [01:29<28:02, 2.61s/it] 5%|▌ | 36/681 [01:32<28:07, 2.62s/it] {'loss': 1.3314, 'grad_norm': 38.84652328491211, 'learning_rate': 2.536231884057971e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.48587337136268616, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.1315288543701172, 'margin_dpo/margin_mean': 1.1315281391143799, 'margin_dpo/margin_std': 1.1600990295410156, 'logps/chosen': -51.955078125, 'logps/rejected': -87.1357421875, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.0406265258789, 'KL/chosen_KL_mean': 0.036411285400390625, 'KL/rejected_KL_mean': -1.0951156616210938, 'KL/mean': -0.529353678226471, 'KL/std': 1.0766912698745728, 'logits/chosen': -0.570672869682312, 'logits/rejected': -0.5350126028060913, 'epoch': 0.05} + 5%|▌ | 36/681 [01:32<28:07, 2.62s/it] 5%|▌ | 37/681 [01:34<28:09, 2.62s/it] {'loss': 1.3351, 'grad_norm': 32.67951202392578, 'learning_rate': 2.6086956521739126e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4867693781852722, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.0608052015304565, 'margin_dpo/margin_mean': 1.0608049631118774, 'margin_dpo/margin_std': 1.3338418006896973, 'logps/chosen': -62.767662048339844, 'logps/rejected': -78.91642761230469, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'KL/chosen_KL_mean': 0.039447784423828125, 'KL/rejected_KL_mean': -1.0213546752929688, 'KL/mean': -0.4909515976905823, 'KL/std': 1.065048336982727, 'logits/chosen': -0.4992326498031616, 'logits/rejected': -0.45456790924072266, 'epoch': 0.05} + 5%|▌ | 37/681 [01:34<28:09, 2.62s/it] 6%|▌ | 38/681 [01:36<26:50, 2.50s/it] {'loss': 1.3182, 'grad_norm': 37.384422302246094, 'learning_rate': 2.681159420289855e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4822547733783722, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.4247064590454102, 'margin_dpo/margin_mean': 1.4247064590454102, 'margin_dpo/margin_std': 1.7057501077651978, 'logps/chosen': -48.21266555786133, 'logps/rejected': -99.15929412841797, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'KL/chosen_KL_mean': 0.1778545379638672, 'KL/rejected_KL_mean': -1.2468528747558594, 'KL/mean': -0.534498929977417, 'KL/std': 1.3990492820739746, 'logits/chosen': -0.5284711122512817, 'logits/rejected': -0.4957655370235443, 'epoch': 0.06} + 6%|▌ | 38/681 [01:37<26:50, 2.50s/it] 6%|▌ | 39/681 [01:39<26:52, 2.51s/it] {'loss': 1.307, 'grad_norm': 39.39072799682617, 'learning_rate': 2.753623188405797e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.47944512963294983, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.6478345394134521, 'margin_dpo/margin_mean': 1.6478347778320312, 'margin_dpo/margin_std': 1.4638022184371948, 'logps/chosen': -50.66071319580078, 'logps/rejected': -80.12759399414062, 'logps/ref_chosen': -50.75047302246094, 'logps/ref_rejected': -78.56951141357422, 'KL/chosen_KL_mean': 0.08975982666015625, 'KL/rejected_KL_mean': -1.5580787658691406, 'KL/mean': -0.7341597080230713, 'KL/std': 1.322374939918518, 'logits/chosen': -0.5558722019195557, 'logits/rejected': -0.5158903002738953, 'epoch': 0.06} + 6%|▌ | 39/681 [01:39<26:52, 2.51s/it] 6%|▌ | 40/681 [01:42<27:27, 2.57s/it] {'loss': 1.3112, 'grad_norm': 32.67512512207031, 'learning_rate': 2.8260869565217386e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.48045170307159424, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.568869948387146, 'margin_dpo/margin_mean': 1.5688700675964355, 'margin_dpo/margin_std': 1.6431810855865479, 'logps/chosen': -57.798038482666016, 'logps/rejected': -75.68191528320312, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.3000717163086, 'KL/chosen_KL_mean': 0.1870288848876953, 'KL/rejected_KL_mean': -1.3818397521972656, 'KL/mean': -0.5974045395851135, 'KL/std': 1.3903248310089111, 'logits/chosen': -0.531123161315918, 'logits/rejected': -0.5019083619117737, 'epoch': 0.06} + 6%|▌ | 40/681 [01:42<27:27, 2.57s/it] 6%|▌ | 41/681 [01:44<27:25, 2.57s/it] {'loss': 1.2953, 'grad_norm': 37.314823150634766, 'learning_rate': 2.898550724637681e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.47608882188796997, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 1.921630620956421, 'margin_dpo/margin_mean': 1.9216312170028687, 'margin_dpo/margin_std': 2.0707690715789795, 'logps/chosen': -62.66911315917969, 'logps/rejected': -98.91845703125, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'KL/chosen_KL_mean': 0.026700973510742188, 'KL/rejected_KL_mean': -1.894927978515625, 'KL/mean': -0.9341164827346802, 'KL/std': 1.8969902992248535, 'logits/chosen': -0.549726128578186, 'logits/rejected': -0.5137777328491211, 'epoch': 0.06} + 6%|▌ | 41/681 [01:44<27:25, 2.57s/it] 6%|▌ | 42/681 [01:47<27:18, 2.56s/it] {'loss': 1.2632, 'grad_norm': 44.921146392822266, 'learning_rate': 2.971014492753623e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.46733659505844116, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.6327667236328125, 'margin_dpo/margin_mean': 2.6327667236328125, 'margin_dpo/margin_std': 2.5917067527770996, 'logps/chosen': -58.759559631347656, 'logps/rejected': -112.33427429199219, 'logps/ref_chosen': -58.966426849365234, 'logps/ref_rejected': -109.90837097167969, 'KL/chosen_KL_mean': 0.20686912536621094, 'KL/rejected_KL_mean': -2.4258995056152344, 'KL/mean': -1.109514832496643, 'KL/std': 2.2611937522888184, 'logits/chosen': -0.5433107614517212, 'logits/rejected': -0.49691134691238403, 'epoch': 0.06} + 6%|▌ | 42/681 [01:47<27:18, 2.56s/it] 6%|▋ | 43/681 [01:49<27:18, 2.57s/it] {'loss': 1.2648, 'grad_norm': 39.96173858642578, 'learning_rate': 3.043478260869565e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.46813303232192993, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.5607895851135254, 'margin_dpo/margin_mean': 2.5607893466949463, 'margin_dpo/margin_std': 1.9669482707977295, 'logps/chosen': -53.58796691894531, 'logps/rejected': -98.47295379638672, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'KL/chosen_KL_mean': 0.5680294036865234, 'KL/rejected_KL_mean': -1.9927635192871094, 'KL/mean': -0.7123653888702393, 'KL/std': 1.8695602416992188, 'logits/chosen': -0.529514729976654, 'logits/rejected': -0.50406813621521, 'epoch': 0.06} + 6%|▋ | 43/681 [01:49<27:18, 2.57s/it] 6%|▋ | 44/681 [01:52<27:17, 2.57s/it] {'loss': 1.2528, 'grad_norm': 44.83015823364258, 'learning_rate': 3.115942028985507e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.46478694677352905, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.832730293273926, 'margin_dpo/margin_mean': 2.832730293273926, 'margin_dpo/margin_std': 2.242119789123535, 'logps/chosen': -49.84926986694336, 'logps/rejected': -111.38726806640625, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'KL/chosen_KL_mean': 0.22922897338867188, 'KL/rejected_KL_mean': -2.603504180908203, 'KL/mean': -1.1871364116668701, 'KL/std': 2.2904388904571533, 'logits/chosen': -0.4886033236980438, 'logits/rejected': -0.4682733416557312, 'epoch': 0.06} + 6%|▋ | 44/681 [01:52<27:17, 2.57s/it] 7%|▋ | 45/681 [01:55<27:27, 2.59s/it] {'loss': 1.2787, 'grad_norm': 35.4964485168457, 'learning_rate': 3.188405797101449e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4713747501373291, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 2.307036876678467, 'margin_dpo/margin_mean': 2.307036876678467, 'margin_dpo/margin_std': 2.619992733001709, 'logps/chosen': -48.26304626464844, 'logps/rejected': -80.09158325195312, 'logps/ref_chosen': -48.4149284362793, 'logps/ref_rejected': -77.93643188476562, 'KL/chosen_KL_mean': 0.15188217163085938, 'KL/rejected_KL_mean': -2.1551513671875, 'KL/mean': -1.0016334056854248, 'KL/std': 2.107585906982422, 'logits/chosen': -0.49817246198654175, 'logits/rejected': -0.48587897419929504, 'epoch': 0.07} + 7%|▋ | 45/681 [01:55<27:27, 2.59s/it] 7%|▋ | 46/681 [01:57<27:41, 2.62s/it] {'loss': 1.2474, 'grad_norm': 40.982444763183594, 'learning_rate': 3.260869565217391e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4626089632511139, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.0271382331848145, 'margin_dpo/margin_mean': 3.0271389484405518, 'margin_dpo/margin_std': 3.295480966567993, 'logps/chosen': -55.78034973144531, 'logps/rejected': -98.46064758300781, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'KL/chosen_KL_mean': 0.21907806396484375, 'KL/rejected_KL_mean': -2.808063507080078, 'KL/mean': -1.2944903373718262, 'KL/std': 2.69203782081604, 'logits/chosen': -0.5164551138877869, 'logits/rejected': -0.464849591255188, 'epoch': 0.07} + 7%|▋ | 46/681 [01:57<27:41, 2.62s/it] 7%|▋ | 47/681 [02:00<27:43, 2.62s/it] {'loss': 1.246, 'grad_norm': 37.45136642456055, 'learning_rate': 3.333333333333333e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.46258771419525146, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.0167040824890137, 'margin_dpo/margin_mean': 3.016704559326172, 'margin_dpo/margin_std': 2.752382516860962, 'logps/chosen': -57.53368377685547, 'logps/rejected': -97.30352020263672, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'KL/chosen_KL_mean': 0.3923931121826172, 'KL/rejected_KL_mean': -2.6243133544921875, 'KL/mean': -1.1159597635269165, 'KL/std': 2.6395797729492188, 'logits/chosen': -0.5594693422317505, 'logits/rejected': -0.5059822797775269, 'epoch': 0.07} + 7%|▋ | 47/681 [02:00<27:43, 2.62s/it] 7%|▋ | 48/681 [02:03<28:03, 2.66s/it] {'loss': 1.2359, 'grad_norm': 42.72023391723633, 'learning_rate': 3.4057971014492755e-07, 'fcm_dpo/beta': 0.05000000074505806, 'fcm_dpo/q_t': 0.4594641327857971, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 3.2726051807403564, 'margin_dpo/margin_mean': 3.2726054191589355, 'margin_dpo/margin_std': 2.975618362426758, 'logps/chosen': -57.112030029296875, 'logps/rejected': -91.21316528320312, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'KL/chosen_KL_mean': 0.07604217529296875, 'KL/rejected_KL_mean': -3.1965599060058594, 'KL/mean': -1.5602600574493408, 'KL/std': 2.765866756439209, 'logits/chosen': -0.5792273879051208, 'logits/rejected': -0.5201135277748108, 'epoch': 0.07} + 7%|▋ | 48/681 [02:03<28:03, 2.66s/it] 7%|▋ | 49/681 [02:05<27:44, 2.63s/it] {'loss': 1.2127, 'grad_norm': 37.98999786376953, 'learning_rate': 3.478260869565217e-07, 'fcm_dpo/beta': 0.05044425278902054, 'fcm_dpo/q_t': 0.45288553833961487, 'fcm_dpo/delta': 0.08806969970464706, 'fcm_dpo/margin': 3.8355047702789307, 'margin_dpo/margin_mean': 3.8355050086975098, 'margin_dpo/margin_std': 4.0506486892700195, 'logps/chosen': -61.403564453125, 'logps/rejected': -87.32127380371094, 'logps/ref_chosen': -61.685272216796875, 'logps/ref_rejected': -83.76747131347656, 'KL/chosen_KL_mean': 0.2817058563232422, 'KL/rejected_KL_mean': -3.5537986755371094, 'KL/mean': -1.6360485553741455, 'KL/std': 3.4877753257751465, 'logits/chosen': -0.5384161472320557, 'logits/rejected': -0.4786253571510315, 'epoch': 0.07} + 7%|▋ | 49/681 [02:05<27:44, 2.63s/it] 7%|▋ | 50/681 [02:08<27:51, 2.65s/it] {'loss': 1.1922, 'grad_norm': 38.957035064697266, 'learning_rate': 3.5507246376811595e-07, 'fcm_dpo/beta': 0.052318423986434937, 'fcm_dpo/q_t': 0.4476335346698761, 'fcm_dpo/delta': 0.1870485544204712, 'fcm_dpo/margin': 4.148881435394287, 'margin_dpo/margin_mean': 4.148880958557129, 'margin_dpo/margin_std': 4.06521463394165, 'logps/chosen': -58.95936965942383, 'logps/rejected': -100.74226379394531, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'KL/chosen_KL_mean': -0.2352313995361328, 'KL/rejected_KL_mean': -4.384113311767578, 'KL/mean': -2.3096749782562256, 'KL/std': 3.5547854900360107, 'logits/chosen': -0.5327342748641968, 'logits/rejected': -0.49643486738204956, 'epoch': 0.07} + 7%|▋ | 50/681 [02:08<27:51, 2.65s/it] 7%|▋ | 51/681 [02:11<27:45, 2.64s/it] {'loss': 1.1977, 'grad_norm': 34.38425827026367, 'learning_rate': 3.6231884057971015e-07, 'fcm_dpo/beta': 0.05356086045503616, 'fcm_dpo/q_t': 0.447162002325058, 'fcm_dpo/delta': 0.06885935366153717, 'fcm_dpo/margin': 4.106563568115234, 'margin_dpo/margin_mean': 4.106563091278076, 'margin_dpo/margin_std': 5.20696496963501, 'logps/chosen': -61.6617431640625, 'logps/rejected': -80.39664459228516, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'KL/chosen_KL_mean': -0.2880744934082031, 'KL/rejected_KL_mean': -4.394641876220703, 'KL/mean': -2.3413543701171875, 'KL/std': 4.241177558898926, 'logits/chosen': -0.5253022313117981, 'logits/rejected': -0.4924160838127136, 'epoch': 0.07} + 7%|▋ | 51/681 [02:11<27:45, 2.64s/it] 8%|▊ | 52/681 [02:13<27:11, 2.59s/it] {'loss': 1.096, 'grad_norm': 41.69569396972656, 'learning_rate': 3.695652173913043e-07, 'fcm_dpo/beta': 0.05409781634807587, 'fcm_dpo/q_t': 0.4186936020851135, 'fcm_dpo/delta': 0.06422993540763855, 'fcm_dpo/margin': 6.246335983276367, 'margin_dpo/margin_mean': 6.246336936950684, 'margin_dpo/margin_std': 5.239194393157959, 'logps/chosen': -52.00030517578125, 'logps/rejected': -85.8832015991211, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'KL/chosen_KL_mean': 0.3370513916015625, 'KL/rejected_KL_mean': -5.9092864990234375, 'KL/mean': -2.786116600036621, 'KL/std': 4.868247985839844, 'logits/chosen': -0.5800528526306152, 'logits/rejected': -0.5262706875801086, 'epoch': 0.08} + 8%|▊ | 52/681 [02:13<27:11, 2.59s/it] 8%|▊ | 53/681 [02:16<27:02, 2.58s/it] {'loss': 1.1022, 'grad_norm': 41.55133056640625, 'learning_rate': 3.7681159420289855e-07, 'fcm_dpo/beta': 0.05494330823421478, 'fcm_dpo/q_t': 0.41828417778015137, 'fcm_dpo/delta': 0.05208485573530197, 'fcm_dpo/margin': 6.352412223815918, 'margin_dpo/margin_mean': 6.352412223815918, 'margin_dpo/margin_std': 6.420080184936523, 'logps/chosen': -53.55384826660156, 'logps/rejected': -98.37519836425781, 'logps/ref_chosen': -53.31465148925781, 'logps/ref_rejected': -91.78359985351562, 'KL/chosen_KL_mean': -0.2391986846923828, 'KL/rejected_KL_mean': -6.591606140136719, 'KL/mean': -3.4154043197631836, 'KL/std': 5.5702056884765625, 'logits/chosen': -0.6179283857345581, 'logits/rejected': -0.5963842272758484, 'epoch': 0.08} + 8%|▊ | 53/681 [02:16<27:02, 2.58s/it] 8%|▊ | 54/681 [02:18<26:29, 2.54s/it] {'loss': 1.1223, 'grad_norm': 38.000980377197266, 'learning_rate': 3.8405797101449274e-07, 'fcm_dpo/beta': 0.055665239691734314, 'fcm_dpo/q_t': 0.42631345987319946, 'fcm_dpo/delta': 0.09488870948553085, 'fcm_dpo/margin': 5.531402111053467, 'margin_dpo/margin_mean': 5.53140115737915, 'margin_dpo/margin_std': 5.259613990783691, 'logps/chosen': -51.15275573730469, 'logps/rejected': -97.71089172363281, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'KL/chosen_KL_mean': -0.4640941619873047, 'KL/rejected_KL_mean': -5.995494842529297, 'KL/mean': -3.2297964096069336, 'KL/std': 5.26720666885376, 'logits/chosen': -0.5974017381668091, 'logits/rejected': -0.5442031025886536, 'epoch': 0.08} + 8%|▊ | 54/681 [02:18<26:29, 2.54s/it] 8%|▊ | 55/681 [02:20<25:32, 2.45s/it] {'loss': 1.0959, 'grad_norm': 37.752960205078125, 'learning_rate': 3.9130434782608694e-07, 'fcm_dpo/beta': 0.05629376322031021, 'fcm_dpo/q_t': 0.4138604402542114, 'fcm_dpo/delta': 0.03277287259697914, 'fcm_dpo/margin': 6.540935039520264, 'margin_dpo/margin_mean': 6.5409345626831055, 'margin_dpo/margin_std': 7.682841777801514, 'logps/chosen': -63.64301681518555, 'logps/rejected': -96.56222534179688, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'KL/chosen_KL_mean': -1.0277824401855469, 'KL/rejected_KL_mean': -7.568717956542969, 'KL/mean': -4.298252105712891, 'KL/std': 6.322789192199707, 'logits/chosen': -0.6345809698104858, 'logits/rejected': -0.5711982250213623, 'epoch': 0.08} + 8%|▊ | 55/681 [02:20<25:32, 2.45s/it] 8%|▊ | 56/681 [02:23<26:04, 2.50s/it] {'loss': 1.1059, 'grad_norm': 35.4822883605957, 'learning_rate': 3.9855072463768114e-07, 'fcm_dpo/beta': 0.05667191743850708, 'fcm_dpo/q_t': 0.41691917181015015, 'fcm_dpo/delta': 0.0462585911154747, 'fcm_dpo/margin': 6.270114421844482, 'margin_dpo/margin_mean': 6.270113945007324, 'margin_dpo/margin_std': 7.562032699584961, 'logps/chosen': -58.7577018737793, 'logps/rejected': -101.26953125, 'logps/ref_chosen': -57.9327278137207, 'logps/ref_rejected': -94.1744384765625, 'KL/chosen_KL_mean': -0.8249740600585938, 'KL/rejected_KL_mean': -7.095088958740234, 'KL/mean': -3.960031032562256, 'KL/std': 6.235048294067383, 'logits/chosen': -0.6079816818237305, 'logits/rejected': -0.5654845237731934, 'epoch': 0.08} + 8%|▊ | 56/681 [02:23<26:04, 2.50s/it] 8%|▊ | 57/681 [02:25<26:07, 2.51s/it] {'loss': 1.0592, 'grad_norm': 39.752960205078125, 'learning_rate': 4.057971014492754e-07, 'fcm_dpo/beta': 0.05690793693065643, 'fcm_dpo/q_t': 0.4039880931377411, 'fcm_dpo/delta': -0.004338288679718971, 'fcm_dpo/margin': 7.098737716674805, 'margin_dpo/margin_mean': 7.0987372398376465, 'margin_dpo/margin_std': 6.829120635986328, 'logps/chosen': -71.3287582397461, 'logps/rejected': -103.49767303466797, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'KL/chosen_KL_mean': -0.8334732055664062, 'KL/rejected_KL_mean': -7.932216644287109, 'KL/mean': -4.382845401763916, 'KL/std': 6.173903942108154, 'logits/chosen': -0.6026902198791504, 'logits/rejected': -0.5755797624588013, 'epoch': 0.08} + 8%|▊ | 57/681 [02:25<26:07, 2.51s/it] 9%|▊ | 58/681 [02:28<26:30, 2.55s/it] {'loss': 1.0414, 'grad_norm': 41.28611755371094, 'learning_rate': 4.1304347826086954e-07, 'fcm_dpo/beta': 0.05647977069020271, 'fcm_dpo/q_t': 0.3962929844856262, 'fcm_dpo/delta': -0.049127254635095596, 'fcm_dpo/margin': 7.912271499633789, 'margin_dpo/margin_mean': 7.912272930145264, 'margin_dpo/margin_std': 8.415109634399414, 'logps/chosen': -63.30101013183594, 'logps/rejected': -93.69764709472656, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'KL/chosen_KL_mean': -1.1680717468261719, 'KL/rejected_KL_mean': -9.08034896850586, 'KL/mean': -5.124210834503174, 'KL/std': 7.0405378341674805, 'logits/chosen': -0.5904037952423096, 'logits/rejected': -0.5139462947845459, 'epoch': 0.09} + 9%|▊ | 58/681 [02:28<26:30, 2.55s/it] 9%|▊ | 59/681 [02:31<26:31, 2.56s/it] {'loss': 1.0196, 'grad_norm': 41.06474304199219, 'learning_rate': 4.2028985507246374e-07, 'fcm_dpo/beta': 0.05512422323226929, 'fcm_dpo/q_t': 0.388535737991333, 'fcm_dpo/delta': -0.08732414245605469, 'fcm_dpo/margin': 8.744604110717773, 'margin_dpo/margin_mean': 8.744604110717773, 'margin_dpo/margin_std': 9.00253677368164, 'logps/chosen': -53.60710906982422, 'logps/rejected': -99.30438232421875, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'KL/chosen_KL_mean': -1.6745834350585938, 'KL/rejected_KL_mean': -10.419181823730469, 'KL/mean': -6.046883583068848, 'KL/std': 7.833342552185059, 'logits/chosen': -0.6156203746795654, 'logits/rejected': -0.5749033689498901, 'epoch': 0.09} + 9%|▊ | 59/681 [02:31<26:31, 2.56s/it] 9%|▉ | 60/681 [02:33<26:22, 2.55s/it] {'loss': 1.0887, 'grad_norm': 41.82515335083008, 'learning_rate': 4.2753623188405794e-07, 'fcm_dpo/beta': 0.055202968418598175, 'fcm_dpo/q_t': 0.4095836579799652, 'fcm_dpo/delta': 0.01902601681649685, 'fcm_dpo/margin': 6.913043975830078, 'margin_dpo/margin_mean': 6.913043975830078, 'margin_dpo/margin_std': 8.055152893066406, 'logps/chosen': -63.95713424682617, 'logps/rejected': -95.32139587402344, 'logps/ref_chosen': -60.94218826293945, 'logps/ref_rejected': -85.39340209960938, 'KL/chosen_KL_mean': -3.0149459838867188, 'KL/rejected_KL_mean': -9.927997589111328, 'KL/mean': -6.47147274017334, 'KL/std': 7.12081241607666, 'logits/chosen': -0.6264551877975464, 'logits/rejected': -0.5676676034927368, 'epoch': 0.09} + 9%|▉ | 60/681 [02:33<26:22, 2.55s/it] 9%|▉ | 61/681 [02:36<26:34, 2.57s/it] {'loss': 1.0562, 'grad_norm': 36.854209899902344, 'learning_rate': 4.3478260869565214e-07, 'fcm_dpo/beta': 0.054592475295066833, 'fcm_dpo/q_t': 0.3965580463409424, 'fcm_dpo/delta': -0.07944516837596893, 'fcm_dpo/margin': 8.706413269042969, 'margin_dpo/margin_mean': 8.706413269042969, 'margin_dpo/margin_std': 12.064128875732422, 'logps/chosen': -62.41876220703125, 'logps/rejected': -100.34414672851562, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'KL/chosen_KL_mean': -1.785238265991211, 'KL/rejected_KL_mean': -10.491649627685547, 'KL/mean': -6.138444900512695, 'KL/std': 9.666519165039062, 'logits/chosen': -0.6180921196937561, 'logits/rejected': -0.5846239328384399, 'epoch': 0.09} + 9%|▉ | 61/681 [02:36<26:34, 2.57s/it] 9%|▉ | 62/681 [02:39<27:02, 2.62s/it] {'loss': 1.1131, 'grad_norm': 35.16862487792969, 'learning_rate': 4.420289855072464e-07, 'fcm_dpo/beta': 0.05502016097307205, 'fcm_dpo/q_t': 0.416960746049881, 'fcm_dpo/delta': 0.041067853569984436, 'fcm_dpo/margin': 6.545290470123291, 'margin_dpo/margin_mean': 6.545290946960449, 'margin_dpo/margin_std': 8.706681251525879, 'logps/chosen': -58.066001892089844, 'logps/rejected': -84.02671813964844, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'KL/chosen_KL_mean': -1.9152297973632812, 'KL/rejected_KL_mean': -8.460521697998047, 'KL/mean': -5.1878767013549805, 'KL/std': 7.09406852722168, 'logits/chosen': -0.6116189956665039, 'logits/rejected': -0.5774843692779541, 'epoch': 0.09} + 9%|▉ | 62/681 [02:39<27:02, 2.62s/it] 9%|▉ | 63/681 [02:41<26:41, 2.59s/it] {'loss': 1.0306, 'grad_norm': 39.14129638671875, 'learning_rate': 4.4927536231884053e-07, 'fcm_dpo/beta': 0.05407857149839401, 'fcm_dpo/q_t': 0.38956284523010254, 'fcm_dpo/delta': -0.07995294779539108, 'fcm_dpo/margin': 8.791152000427246, 'margin_dpo/margin_mean': 8.791152000427246, 'margin_dpo/margin_std': 9.617431640625, 'logps/chosen': -76.36060333251953, 'logps/rejected': -109.61442565917969, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'KL/chosen_KL_mean': -3.2132091522216797, 'KL/rejected_KL_mean': -12.004364013671875, 'KL/mean': -7.6087846755981445, 'KL/std': 9.102246284484863, 'logits/chosen': -0.6333186626434326, 'logits/rejected': -0.5910948514938354, 'epoch': 0.09} + 9%|▉ | 63/681 [02:41<26:41, 2.59s/it] 9%|▉ | 64/681 [02:44<26:14, 2.55s/it] {'loss': 0.9991, 'grad_norm': 35.86033248901367, 'learning_rate': 4.5652173913043473e-07, 'fcm_dpo/beta': 0.05238521099090576, 'fcm_dpo/q_t': 0.37639370560646057, 'fcm_dpo/delta': -0.16296426951885223, 'fcm_dpo/margin': 10.527724266052246, 'margin_dpo/margin_mean': 10.527724266052246, 'margin_dpo/margin_std': 12.154861450195312, 'logps/chosen': -55.36868667602539, 'logps/rejected': -105.42801666259766, 'logps/ref_chosen': -53.998600006103516, 'logps/ref_rejected': -93.53019714355469, 'KL/chosen_KL_mean': -1.370086669921875, 'KL/rejected_KL_mean': -11.897815704345703, 'KL/mean': -6.633951187133789, 'KL/std': 9.861505508422852, 'logits/chosen': -0.5971044898033142, 'logits/rejected': -0.5653672218322754, 'epoch': 0.09} + 9%|▉ | 64/681 [02:44<26:14, 2.55s/it] 10%|▉ | 65/681 [02:46<26:23, 2.57s/it] {'loss': 1.0021, 'grad_norm': 36.674705505371094, 'learning_rate': 4.63768115942029e-07, 'fcm_dpo/beta': 0.05090530961751938, 'fcm_dpo/q_t': 0.3747457265853882, 'fcm_dpo/delta': -0.15866145491600037, 'fcm_dpo/margin': 10.78107738494873, 'margin_dpo/margin_mean': 10.781078338623047, 'margin_dpo/margin_std': 12.379752159118652, 'logps/chosen': -68.69613647460938, 'logps/rejected': -124.58767700195312, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'KL/chosen_KL_mean': -3.860137939453125, 'KL/rejected_KL_mean': -14.641212463378906, 'KL/mean': -9.250676155090332, 'KL/std': 11.2495698928833, 'logits/chosen': -0.6921563148498535, 'logits/rejected': -0.6807618737220764, 'epoch': 0.1} + 10%|▉ | 65/681 [02:46<26:23, 2.57s/it] 10%|▉ | 66/681 [02:49<26:32, 2.59s/it] {'loss': 1.0403, 'grad_norm': 32.98362350463867, 'learning_rate': 4.7101449275362313e-07, 'fcm_dpo/beta': 0.04975783824920654, 'fcm_dpo/q_t': 0.39193886518478394, 'fcm_dpo/delta': -0.08998537063598633, 'fcm_dpo/margin': 9.735556602478027, 'margin_dpo/margin_mean': 9.735556602478027, 'margin_dpo/margin_std': 12.185860633850098, 'logps/chosen': -55.02747344970703, 'logps/rejected': -88.955810546875, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629913330078, 'KL/chosen_KL_mean': -3.5839481353759766, 'KL/rejected_KL_mean': -13.319507598876953, 'KL/mean': -8.451730728149414, 'KL/std': 10.042497634887695, 'logits/chosen': -0.6278406381607056, 'logits/rejected': -0.5947624444961548, 'epoch': 0.1} + 10%|▉ | 66/681 [02:49<26:32, 2.59s/it] 10%|▉ | 67/681 [02:51<25:31, 2.49s/it] {'loss': 1.031, 'grad_norm': 33.585758209228516, 'learning_rate': 4.782608695652174e-07, 'fcm_dpo/beta': 0.049183670431375504, 'fcm_dpo/q_t': 0.38883906602859497, 'fcm_dpo/delta': -0.10368002951145172, 'fcm_dpo/margin': 10.138212203979492, 'margin_dpo/margin_mean': 10.138212203979492, 'margin_dpo/margin_std': 12.514627456665039, 'logps/chosen': -62.645179748535156, 'logps/rejected': -86.22987365722656, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78728485107422, 'KL/chosen_KL_mean': -3.3043766021728516, 'KL/rejected_KL_mean': -13.44259262084961, 'KL/mean': -8.373483657836914, 'KL/std': 10.349268913269043, 'logits/chosen': -0.6145930886268616, 'logits/rejected': -0.5722061395645142, 'epoch': 0.1} + 10%|▉ | 67/681 [02:51<25:31, 2.49s/it] 10%|▉ | 68/681 [02:53<25:14, 2.47s/it] {'loss': 1.0309, 'grad_norm': 33.56498336791992, 'learning_rate': 4.855072463768116e-07, 'fcm_dpo/beta': 0.04838772863149643, 'fcm_dpo/q_t': 0.39209192991256714, 'fcm_dpo/delta': -0.0719866156578064, 'fcm_dpo/margin': 9.685721397399902, 'margin_dpo/margin_mean': 9.685721397399902, 'margin_dpo/margin_std': 10.507135391235352, 'logps/chosen': -69.44406127929688, 'logps/rejected': -91.1312026977539, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'KL/chosen_KL_mean': -4.238227844238281, 'KL/rejected_KL_mean': -13.923954010009766, 'KL/mean': -9.08108901977539, 'KL/std': 9.300742149353027, 'logits/chosen': -0.6377149820327759, 'logits/rejected': -0.5744598507881165, 'epoch': 0.1} + 10%|▉ | 68/681 [02:53<25:14, 2.47s/it] 10%|█ | 69/681 [02:56<26:02, 2.55s/it] {'loss': 0.9606, 'grad_norm': 35.174522399902344, 'learning_rate': 4.927536231884058e-07, 'fcm_dpo/beta': 0.04644005745649338, 'fcm_dpo/q_t': 0.36728864908218384, 'fcm_dpo/delta': -0.20860767364501953, 'fcm_dpo/margin': 12.812248229980469, 'margin_dpo/margin_mean': 12.812247276306152, 'margin_dpo/margin_std': 13.367576599121094, 'logps/chosen': -65.06889343261719, 'logps/rejected': -121.45076751708984, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'KL/chosen_KL_mean': -5.249656677246094, 'KL/rejected_KL_mean': -18.061904907226562, 'KL/mean': -11.655780792236328, 'KL/std': 11.926582336425781, 'logits/chosen': -0.6080530285835266, 'logits/rejected': -0.5861386060714722, 'epoch': 0.1} + 10%|█ | 69/681 [02:56<26:02, 2.55s/it] 10%|█ | 70/681 [02:59<25:42, 2.53s/it] {'loss': 0.9698, 'grad_norm': 36.163818359375, 'learning_rate': 5e-07, 'fcm_dpo/beta': 0.04471848905086517, 'fcm_dpo/q_t': 0.3647538721561432, 'fcm_dpo/delta': -0.2335546314716339, 'fcm_dpo/margin': 13.855405807495117, 'margin_dpo/margin_mean': 13.855405807495117, 'margin_dpo/margin_std': 15.959779739379883, 'logps/chosen': -69.48574829101562, 'logps/rejected': -112.47129821777344, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.06078338623047, 'KL/chosen_KL_mean': -7.555110931396484, 'KL/rejected_KL_mean': -21.41051483154297, 'KL/mean': -14.482812881469727, 'KL/std': 15.016265869140625, 'logits/chosen': -0.6315578818321228, 'logits/rejected': -0.5984231233596802, 'epoch': 0.1} + 10%|█ | 70/681 [02:59<25:42, 2.53s/it] 10%|█ | 71/681 [03:01<25:42, 2.53s/it] {'loss': 0.9201, 'grad_norm': 33.68754196166992, 'learning_rate': 4.999967061337492e-07, 'fcm_dpo/beta': 0.04178931191563606, 'fcm_dpo/q_t': 0.35157865285873413, 'fcm_dpo/delta': -0.32131147384643555, 'fcm_dpo/margin': 16.68012237548828, 'margin_dpo/margin_mean': 16.68012237548828, 'margin_dpo/margin_std': 17.86024284362793, 'logps/chosen': -69.49346923828125, 'logps/rejected': -121.75987243652344, 'logps/ref_chosen': -61.750335693359375, 'logps/ref_rejected': -97.33662414550781, 'KL/chosen_KL_mean': -7.743133544921875, 'KL/rejected_KL_mean': -24.423255920410156, 'KL/mean': -16.083194732666016, 'KL/std': 16.247331619262695, 'logits/chosen': -0.6753981113433838, 'logits/rejected': -0.63995361328125, 'epoch': 0.1} + 10%|█ | 71/681 [03:01<25:42, 2.53s/it] 11%|█ | 72/681 [03:04<25:59, 2.56s/it] {'loss': 0.9598, 'grad_norm': 35.54236602783203, 'learning_rate': 4.999868246217933e-07, 'fcm_dpo/beta': 0.03948363661766052, 'fcm_dpo/q_t': 0.35484981536865234, 'fcm_dpo/delta': -0.290458619594574, 'fcm_dpo/margin': 16.97983169555664, 'margin_dpo/margin_mean': 16.97983169555664, 'margin_dpo/margin_std': 20.594505310058594, 'logps/chosen': -74.88831329345703, 'logps/rejected': -121.10172271728516, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'KL/chosen_KL_mean': -8.834901809692383, 'KL/rejected_KL_mean': -25.814735412597656, 'KL/mean': -17.32482147216797, 'KL/std': 17.12436866760254, 'logits/chosen': -0.6629385352134705, 'logits/rejected': -0.6264936327934265, 'epoch': 0.11} + 11%|█ | 72/681 [03:04<25:59, 2.56s/it] 11%|█ | 73/681 [03:07<26:27, 2.61s/it] {'loss': 1.0587, 'grad_norm': 35.56188201904297, 'learning_rate': 4.999703557245192e-07, 'fcm_dpo/beta': 0.03733060508966446, 'fcm_dpo/q_t': 0.37481170892715454, 'fcm_dpo/delta': -0.25653302669525146, 'fcm_dpo/margin': 17.105716705322266, 'margin_dpo/margin_mean': 17.105716705322266, 'margin_dpo/margin_std': 27.76058006286621, 'logps/chosen': -77.06851196289062, 'logps/rejected': -118.37409210205078, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613098144531, 'KL/chosen_KL_mean': -10.812238693237305, 'KL/rejected_KL_mean': -27.917957305908203, 'KL/mean': -19.365093231201172, 'KL/std': 21.935474395751953, 'logits/chosen': -0.6949942708015442, 'logits/rejected': -0.6558982729911804, 'epoch': 0.11} + 11%|█ | 73/681 [03:07<26:27, 2.61s/it] 11%|█ | 74/681 [03:09<26:09, 2.59s/it] {'loss': 1.0082, 'grad_norm': 35.718910217285156, 'learning_rate': 4.999472998758977e-07, 'fcm_dpo/beta': 0.03536809980869293, 'fcm_dpo/q_t': 0.36699697375297546, 'fcm_dpo/delta': -0.29220515489578247, 'fcm_dpo/margin': 18.996326446533203, 'margin_dpo/margin_mean': 18.996326446533203, 'margin_dpo/margin_std': 30.294841766357422, 'logps/chosen': -64.44507598876953, 'logps/rejected': -125.96345520019531, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'KL/chosen_KL_mean': -11.020193099975586, 'KL/rejected_KL_mean': -30.016517639160156, 'KL/mean': -20.51835823059082, 'KL/std': 24.099422454833984, 'logits/chosen': -0.6316280364990234, 'logits/rejected': -0.6258025169372559, 'epoch': 0.11} + 11%|█ | 74/681 [03:09<26:09, 2.59s/it] 11%|█ | 75/681 [03:12<26:20, 2.61s/it] {'loss': 0.862, 'grad_norm': 30.176677703857422, 'learning_rate': 4.999176576834721e-07, 'fcm_dpo/beta': 0.03265610337257385, 'fcm_dpo/q_t': 0.3248700201511383, 'fcm_dpo/delta': -0.5023984909057617, 'fcm_dpo/margin': 26.377384185791016, 'margin_dpo/margin_mean': 26.377382278442383, 'margin_dpo/margin_std': 27.90032196044922, 'logps/chosen': -62.67523193359375, 'logps/rejected': -148.44493103027344, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25398254394531, 'KL/chosen_KL_mean': -10.813570022583008, 'KL/rejected_KL_mean': -37.190948486328125, 'KL/mean': -24.00226402282715, 'KL/std': 24.315690994262695, 'logits/chosen': -0.6893298625946045, 'logits/rejected': -0.6836451292037964, 'epoch': 0.11} + 11%|█ | 75/681 [03:12<26:20, 2.61s/it] 11%|█ | 76/681 [03:14<26:08, 2.59s/it] {'loss': 1.0435, 'grad_norm': 31.088647842407227, 'learning_rate': 4.998814299283415e-07, 'fcm_dpo/beta': 0.031097372993826866, 'fcm_dpo/q_t': 0.3852229416370392, 'fcm_dpo/delta': -0.10961665213108063, 'fcm_dpo/margin': 16.212825775146484, 'margin_dpo/margin_mean': 16.21282386779785, 'margin_dpo/margin_std': 21.476770401000977, 'logps/chosen': -66.13577270507812, 'logps/rejected': -107.29917907714844, 'logps/ref_chosen': -53.26603698730469, 'logps/ref_rejected': -78.21662902832031, 'KL/chosen_KL_mean': -12.86973762512207, 'KL/rejected_KL_mean': -29.082550048828125, 'KL/mean': -20.976146697998047, 'KL/std': 19.501949310302734, 'logits/chosen': -0.7124214172363281, 'logits/rejected': -0.6733113527297974, 'epoch': 0.11} + 11%|█ | 76/681 [03:14<26:08, 2.59s/it] 11%|█▏ | 77/681 [03:17<25:08, 2.50s/it] {'loss': 0.9116, 'grad_norm': 32.505531311035156, 'learning_rate': 4.998386175651409e-07, 'fcm_dpo/beta': 0.028904041275382042, 'fcm_dpo/q_t': 0.3336452543735504, 'fcm_dpo/delta': -0.40436428785324097, 'fcm_dpo/margin': 26.634933471679688, 'margin_dpo/margin_mean': 26.634933471679688, 'margin_dpo/margin_std': 29.75762176513672, 'logps/chosen': -69.680908203125, 'logps/rejected': -131.99278259277344, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'KL/chosen_KL_mean': -11.584232330322266, 'KL/rejected_KL_mean': -38.21916580200195, 'KL/mean': -24.901695251464844, 'KL/std': 26.45264434814453, 'logits/chosen': -0.678729236125946, 'logits/rejected': -0.6420924663543701, 'epoch': 0.11} + 11%|█▏ | 77/681 [03:17<25:08, 2.50s/it] 11%|█▏ | 78/681 [03:19<25:33, 2.54s/it] {'loss': 1.0166, 'grad_norm': 28.661237716674805, 'learning_rate': 4.997892217220159e-07, 'fcm_dpo/beta': 0.027905140072107315, 'fcm_dpo/q_t': 0.3796628713607788, 'fcm_dpo/delta': -0.15760990977287292, 'fcm_dpo/margin': 19.68130111694336, 'margin_dpo/margin_mean': 19.68130111694336, 'margin_dpo/margin_std': 24.757617950439453, 'logps/chosen': -67.01849365234375, 'logps/rejected': -116.02037811279297, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'KL/chosen_KL_mean': -11.404712677001953, 'KL/rejected_KL_mean': -31.086013793945312, 'KL/mean': -21.245365142822266, 'KL/std': 21.289440155029297, 'logits/chosen': -0.6651836037635803, 'logits/rejected': -0.6431748270988464, 'epoch': 0.11} + 11%|█▏ | 78/681 [03:19<25:33, 2.54s/it] 12%|█▏ | 79/681 [03:22<25:50, 2.58s/it] {'loss': 1.0255, 'grad_norm': 25.604280471801758, 'learning_rate': 4.997332437005931e-07, 'fcm_dpo/beta': 0.02685295045375824, 'fcm_dpo/q_t': 0.3792785704135895, 'fcm_dpo/delta': -0.18282675743103027, 'fcm_dpo/margin': 21.310894012451172, 'margin_dpo/margin_mean': 21.310894012451172, 'margin_dpo/margin_std': 29.21087074279785, 'logps/chosen': -67.3143310546875, 'logps/rejected': -120.82229614257812, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'KL/chosen_KL_mean': -11.86384391784668, 'KL/rejected_KL_mean': -33.17473602294922, 'KL/mean': -22.519290924072266, 'KL/std': 23.79071044921875, 'logits/chosen': -0.6872971057891846, 'logits/rejected': -0.6611636281013489, 'epoch': 0.12} + 12%|█▏ | 79/681 [03:22<25:50, 2.58s/it] 12%|█▏ | 80/681 [03:24<25:43, 2.57s/it] {'loss': 1.0581, 'grad_norm': 27.309263229370117, 'learning_rate': 4.996706849759452e-07, 'fcm_dpo/beta': 0.02588074468076229, 'fcm_dpo/q_t': 0.39049336314201355, 'fcm_dpo/delta': -0.13631115853786469, 'fcm_dpo/margin': 20.36130142211914, 'margin_dpo/margin_mean': 20.36130142211914, 'margin_dpo/margin_std': 29.580612182617188, 'logps/chosen': -73.51201629638672, 'logps/rejected': -122.90153503417969, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'KL/chosen_KL_mean': -14.992725372314453, 'KL/rejected_KL_mean': -35.354034423828125, 'KL/mean': -25.173377990722656, 'KL/std': 25.4627742767334, 'logits/chosen': -0.7352666854858398, 'logits/rejected': -0.69718337059021, 'epoch': 0.12} + 12%|█▏ | 80/681 [03:24<25:43, 2.57s/it] 12%|█▏ | 81/681 [03:27<25:58, 2.60s/it] {'loss': 0.9671, 'grad_norm': 27.911373138427734, 'learning_rate': 4.996015471965529e-07, 'fcm_dpo/beta': 0.024616166949272156, 'fcm_dpo/q_t': 0.35815176367759705, 'fcm_dpo/delta': -0.30468764901161194, 'fcm_dpo/margin': 27.69186782836914, 'margin_dpo/margin_mean': 27.69186782836914, 'margin_dpo/margin_std': 35.64756393432617, 'logps/chosen': -80.48961639404297, 'logps/rejected': -171.39532470703125, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'KL/chosen_KL_mean': -14.040748596191406, 'KL/rejected_KL_mean': -41.73262023925781, 'KL/mean': -27.886680603027344, 'KL/std': 29.870553970336914, 'logits/chosen': -0.6956121921539307, 'logits/rejected': -0.6685779094696045, 'epoch': 0.12} + 12%|█▏ | 81/681 [03:27<25:58, 2.60s/it] 12%|█▏ | 82/681 [03:30<25:24, 2.54s/it] {'loss': 1.1014, 'grad_norm': 31.045066833496094, 'learning_rate': 4.995258321842611e-07, 'fcm_dpo/beta': 0.024119626730680466, 'fcm_dpo/q_t': 0.389964759349823, 'fcm_dpo/delta': -0.1382198929786682, 'fcm_dpo/margin': 21.982955932617188, 'margin_dpo/margin_mean': 21.982955932617188, 'margin_dpo/margin_std': 37.50407028198242, 'logps/chosen': -68.57518005371094, 'logps/rejected': -129.06900024414062, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'KL/chosen_KL_mean': -16.342798233032227, 'KL/rejected_KL_mean': -38.32575607299805, 'KL/mean': -27.334274291992188, 'KL/std': 27.074535369873047, 'logits/chosen': -0.6336376070976257, 'logits/rejected': -0.6242895722389221, 'epoch': 0.12} + 12%|█▏ | 82/681 [03:30<25:24, 2.54s/it] 12%|█▏ | 83/681 [03:32<25:01, 2.51s/it] {'loss': 1.0016, 'grad_norm': 27.287622451782227, 'learning_rate': 4.994435419342304e-07, 'fcm_dpo/beta': 0.022889206185936928, 'fcm_dpo/q_t': 0.3712141811847687, 'fcm_dpo/delta': -0.21175748109817505, 'fcm_dpo/margin': 26.131641387939453, 'margin_dpo/margin_mean': 26.131643295288086, 'margin_dpo/margin_std': 33.42725372314453, 'logps/chosen': -72.53076171875, 'logps/rejected': -146.55091857910156, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71589660644531, 'KL/chosen_KL_mean': -16.703378677368164, 'KL/rejected_KL_mean': -42.835018157958984, 'KL/mean': -29.769197463989258, 'KL/std': 28.040857315063477, 'logits/chosen': -0.6630829572677612, 'logits/rejected': -0.6318089962005615, 'epoch': 0.12} + 12%|█▏ | 83/681 [03:32<25:01, 2.51s/it] 12%|█▏ | 84/681 [03:35<25:42, 2.58s/it] {'loss': 1.0329, 'grad_norm': 24.89923858642578, 'learning_rate': 4.993546786148857e-07, 'fcm_dpo/beta': 0.022253597155213356, 'fcm_dpo/q_t': 0.3886079490184784, 'fcm_dpo/delta': -0.08681607246398926, 'fcm_dpo/margin': 21.604461669921875, 'margin_dpo/margin_mean': 21.604461669921875, 'margin_dpo/margin_std': 24.17331314086914, 'logps/chosen': -82.97685241699219, 'logps/rejected': -124.70375061035156, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'KL/chosen_KL_mean': -15.800683975219727, 'KL/rejected_KL_mean': -37.4051513671875, 'KL/mean': -26.602916717529297, 'KL/std': 23.81344223022461, 'logits/chosen': -0.6726013422012329, 'logits/rejected': -0.635522723197937, 'epoch': 0.12} + 12%|█▏ | 84/681 [03:35<25:42, 2.58s/it] 12%|█▏ | 85/681 [03:37<26:07, 2.63s/it] {'loss': 1.0486, 'grad_norm': 24.93961524963379, 'learning_rate': 4.992592445678582e-07, 'fcm_dpo/beta': 0.022155042737722397, 'fcm_dpo/q_t': 0.3894878029823303, 'fcm_dpo/delta': -0.10365713387727737, 'fcm_dpo/margin': 22.484390258789062, 'margin_dpo/margin_mean': 22.484390258789062, 'margin_dpo/margin_std': 29.117321014404297, 'logps/chosen': -74.23526000976562, 'logps/rejected': -116.95182800292969, 'logps/ref_chosen': -58.4066162109375, 'logps/ref_rejected': -78.63880157470703, 'KL/chosen_KL_mean': -15.828641891479492, 'KL/rejected_KL_mean': -38.31303024291992, 'KL/mean': -27.07083511352539, 'KL/std': 24.207460403442383, 'logits/chosen': -0.6551598310470581, 'logits/rejected': -0.623712420463562, 'epoch': 0.12} + 12%|█▏ | 85/681 [03:37<26:07, 2.63s/it] 13%|█▎ | 86/681 [03:40<26:19, 2.65s/it] {'loss': 1.1337, 'grad_norm': 31.930374145507812, 'learning_rate': 4.991572423079235e-07, 'fcm_dpo/beta': 0.0215867031365633, 'fcm_dpo/q_t': 0.40107935667037964, 'fcm_dpo/delta': -0.1179293692111969, 'fcm_dpo/margin': 23.718732833862305, 'margin_dpo/margin_mean': 23.718732833862305, 'margin_dpo/margin_std': 45.869529724121094, 'logps/chosen': -76.57453155517578, 'logps/rejected': -132.27743530273438, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'KL/chosen_KL_mean': -20.43706703186035, 'KL/rejected_KL_mean': -44.155792236328125, 'KL/mean': -32.29643249511719, 'KL/std': 33.11329650878906, 'logits/chosen': -0.6938769817352295, 'logits/rejected': -0.6838431358337402, 'epoch': 0.13} + 13%|█▎ | 86/681 [03:40<26:19, 2.65s/it] 13%|█▎ | 87/681 [03:43<26:11, 2.65s/it] {'loss': 1.0366, 'grad_norm': 25.189109802246094, 'learning_rate': 4.990486745229364e-07, 'fcm_dpo/beta': 0.020640596747398376, 'fcm_dpo/q_t': 0.3739252984523773, 'fcm_dpo/delta': -0.19566936790943146, 'fcm_dpo/margin': 28.232730865478516, 'margin_dpo/margin_mean': 28.232730865478516, 'margin_dpo/margin_std': 40.02729797363281, 'logps/chosen': -75.63643646240234, 'logps/rejected': -143.70065307617188, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'KL/chosen_KL_mean': -20.00033950805664, 'KL/rejected_KL_mean': -48.23307418823242, 'KL/mean': -34.11670684814453, 'KL/std': 32.86204528808594, 'logits/chosen': -0.7301120758056641, 'logits/rejected': -0.7094823122024536, 'epoch': 0.13} + 13%|█▎ | 87/681 [03:43<26:11, 2.65s/it] 13%|█▎ | 88/681 [03:45<26:08, 2.64s/it] {'loss': 1.1371, 'grad_norm': 26.31541633605957, 'learning_rate': 4.989335440737586e-07, 'fcm_dpo/beta': 0.020320210605859756, 'fcm_dpo/q_t': 0.40818527340888977, 'fcm_dpo/delta': -0.03590092435479164, 'fcm_dpo/margin': 21.33620262145996, 'margin_dpo/margin_mean': 21.33620262145996, 'margin_dpo/margin_std': 36.64844512939453, 'logps/chosen': -98.9044189453125, 'logps/rejected': -153.27796936035156, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'KL/chosen_KL_mean': -25.23326873779297, 'KL/rejected_KL_mean': -46.5694694519043, 'KL/mean': -35.901371002197266, 'KL/std': 34.48196792602539, 'logits/chosen': -0.6587375402450562, 'logits/rejected': -0.6542388796806335, 'epoch': 0.13} + 13%|█▎ | 88/681 [03:45<26:08, 2.64s/it] 13%|█▎ | 89/681 [03:48<25:31, 2.59s/it] {'loss': 1.0599, 'grad_norm': 23.98124122619629, 'learning_rate': 4.988118539941847e-07, 'fcm_dpo/beta': 0.02023715153336525, 'fcm_dpo/q_t': 0.3936477303504944, 'fcm_dpo/delta': -0.08208386600017548, 'fcm_dpo/margin': 23.62615966796875, 'margin_dpo/margin_mean': 23.626155853271484, 'margin_dpo/margin_std': 32.83501434326172, 'logps/chosen': -76.07754516601562, 'logps/rejected': -121.16233825683594, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'KL/chosen_KL_mean': -15.452627182006836, 'KL/rejected_KL_mean': -39.078792572021484, 'KL/mean': -27.265708923339844, 'KL/std': 28.668766021728516, 'logits/chosen': -0.7175350189208984, 'logits/rejected': -0.6864731311798096, 'epoch': 0.13} + 13%|█▎ | 89/681 [03:48<25:31, 2.59s/it] 13%|█▎ | 90/681 [03:50<25:04, 2.55s/it] {'loss': 1.0349, 'grad_norm': 24.72134780883789, 'learning_rate': 4.986836074908615e-07, 'fcm_dpo/beta': 0.01951216161251068, 'fcm_dpo/q_t': 0.37370768189430237, 'fcm_dpo/delta': -0.2572743892669678, 'fcm_dpo/margin': 32.86386489868164, 'margin_dpo/margin_mean': 32.863868713378906, 'margin_dpo/margin_std': 49.08679962158203, 'logps/chosen': -72.82841491699219, 'logps/rejected': -163.95169067382812, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'KL/chosen_KL_mean': -19.54310417175293, 'KL/rejected_KL_mean': -52.406978607177734, 'KL/mean': -35.97503662109375, 'KL/std': 39.376708984375, 'logits/chosen': -0.636969268321991, 'logits/rejected': -0.6531749963760376, 'epoch': 0.13} + 13%|█▎ | 90/681 [03:50<25:04, 2.55s/it] 13%|█▎ | 91/681 [03:53<25:15, 2.57s/it] {'loss': 1.0839, 'grad_norm': 24.04185676574707, 'learning_rate': 4.985488079432037e-07, 'fcm_dpo/beta': 0.018853671848773956, 'fcm_dpo/q_t': 0.3956088721752167, 'fcm_dpo/delta': -0.0963558554649353, 'fcm_dpo/margin': 26.079620361328125, 'margin_dpo/margin_mean': 26.079620361328125, 'margin_dpo/margin_std': 40.8889274597168, 'logps/chosen': -82.43911743164062, 'logps/rejected': -134.5897216796875, 'logps/ref_chosen': -61.802955627441406, 'logps/ref_rejected': -87.87395477294922, 'KL/chosen_KL_mean': -20.63615608215332, 'KL/rejected_KL_mean': -46.71577453613281, 'KL/mean': -33.67596435546875, 'KL/std': 31.137435913085938, 'logits/chosen': -0.6773035526275635, 'logits/rejected': -0.645221471786499, 'epoch': 0.13} + 13%|█▎ | 91/681 [03:53<25:15, 2.57s/it] 14%|█▎ | 92/681 [03:55<24:55, 2.54s/it] {'loss': 1.0646, 'grad_norm': 22.194721221923828, 'learning_rate': 4.984074589033043e-07, 'fcm_dpo/beta': 0.01851240172982216, 'fcm_dpo/q_t': 0.3927502930164337, 'fcm_dpo/delta': -0.10138699412345886, 'fcm_dpo/margin': 26.81001091003418, 'margin_dpo/margin_mean': 26.81001091003418, 'margin_dpo/margin_std': 38.659828186035156, 'logps/chosen': -69.8050537109375, 'logps/rejected': -122.85547637939453, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'KL/chosen_KL_mean': -18.164283752441406, 'KL/rejected_KL_mean': -44.97429275512695, 'KL/mean': -31.569286346435547, 'KL/std': 31.935270309448242, 'logits/chosen': -0.6895343065261841, 'logits/rejected': -0.6665633916854858, 'epoch': 0.14} + 14%|█▎ | 92/681 [03:55<24:55, 2.54s/it] 14%|█▎ | 93/681 [03:58<23:41, 2.42s/it] {'loss': 1.0447, 'grad_norm': 23.166257858276367, 'learning_rate': 4.982595640958425e-07, 'fcm_dpo/beta': 0.018106218427419662, 'fcm_dpo/q_t': 0.39256197214126587, 'fcm_dpo/delta': -0.08807133883237839, 'fcm_dpo/margin': 26.72509002685547, 'margin_dpo/margin_mean': 26.72509002685547, 'margin_dpo/margin_std': 34.69382095336914, 'logps/chosen': -73.01956176757812, 'logps/rejected': -124.37615966796875, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.16075134277344, 'KL/chosen_KL_mean': -20.49032211303711, 'KL/rejected_KL_mean': -47.21541213989258, 'KL/mean': -33.852867126464844, 'KL/std': 29.824676513671875, 'logits/chosen': -0.7384850978851318, 'logits/rejected': -0.6940040588378906, 'epoch': 0.14} + 14%|█▎ | 93/681 [03:58<23:41, 2.42s/it] 14%|█▍ | 94/681 [04:00<24:33, 2.51s/it] {'loss': 1.0039, 'grad_norm': 22.59712028503418, 'learning_rate': 4.98105127417984e-07, 'fcm_dpo/beta': 0.017487093806266785, 'fcm_dpo/q_t': 0.378325879573822, 'fcm_dpo/delta': -0.15506845712661743, 'fcm_dpo/margin': 31.172901153564453, 'margin_dpo/margin_mean': 31.172901153564453, 'margin_dpo/margin_std': 36.12760925292969, 'logps/chosen': -83.21534729003906, 'logps/rejected': -152.76466369628906, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'KL/chosen_KL_mean': -21.992738723754883, 'KL/rejected_KL_mean': -53.16563034057617, 'KL/mean': -37.579185485839844, 'KL/std': 34.558380126953125, 'logits/chosen': -0.6630722880363464, 'logits/rejected': -0.6506177186965942, 'epoch': 0.14} + 14%|█▍ | 94/681 [04:00<24:33, 2.51s/it] 14%|█▍ | 95/681 [04:03<24:22, 2.50s/it] {'loss': 1.0881, 'grad_norm': 21.476041793823242, 'learning_rate': 4.979441529392784e-07, 'fcm_dpo/beta': 0.01729883998632431, 'fcm_dpo/q_t': 0.4017173647880554, 'fcm_dpo/delta': -0.031661614775657654, 'fcm_dpo/margin': 24.85043716430664, 'margin_dpo/margin_mean': 24.850439071655273, 'margin_dpo/margin_std': 35.13576889038086, 'logps/chosen': -73.71357727050781, 'logps/rejected': -121.92072296142578, 'logps/ref_chosen': -52.523643493652344, 'logps/ref_rejected': -75.8803482055664, 'KL/chosen_KL_mean': -21.189931869506836, 'KL/rejected_KL_mean': -46.040374755859375, 'KL/mean': -33.615150451660156, 'KL/std': 33.43156433105469, 'logits/chosen': -0.6821004152297974, 'logits/rejected': -0.655462920665741, 'epoch': 0.14} + 14%|█▍ | 95/681 [04:03<24:22, 2.50s/it] 14%|█▍ | 96/681 [04:05<24:28, 2.51s/it] {'loss': 0.9975, 'grad_norm': 21.628402709960938, 'learning_rate': 4.977766449015534e-07, 'fcm_dpo/beta': 0.016751719638705254, 'fcm_dpo/q_t': 0.3761478066444397, 'fcm_dpo/delta': -0.18152689933776855, 'fcm_dpo/margin': 33.95726013183594, 'margin_dpo/margin_mean': 33.95726013183594, 'margin_dpo/margin_std': 41.307655334472656, 'logps/chosen': -82.80143737792969, 'logps/rejected': -151.19773864746094, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'KL/chosen_KL_mean': -20.64446449279785, 'KL/rejected_KL_mean': -54.601722717285156, 'KL/mean': -37.62309646606445, 'KL/std': 37.295902252197266, 'logits/chosen': -0.7095851898193359, 'logits/rejected': -0.6844866275787354, 'epoch': 0.14} + 14%|█▍ | 96/681 [04:05<24:28, 2.51s/it] 14%|█▍ | 97/681 [04:08<24:35, 2.53s/it] {'loss': 1.0676, 'grad_norm': 22.661230087280273, 'learning_rate': 4.976026077188012e-07, 'fcm_dpo/beta': 0.0167661365121603, 'fcm_dpo/q_t': 0.3992045521736145, 'fcm_dpo/delta': -0.0386638417840004, 'fcm_dpo/margin': 25.998580932617188, 'margin_dpo/margin_mean': 25.998584747314453, 'margin_dpo/margin_std': 31.176647186279297, 'logps/chosen': -76.47802734375, 'logps/rejected': -124.79499816894531, 'logps/ref_chosen': -54.646366119384766, 'logps/ref_rejected': -76.96475219726562, 'KL/chosen_KL_mean': -21.8316650390625, 'KL/rejected_KL_mean': -47.83024215698242, 'KL/mean': -34.830955505371094, 'KL/std': 31.186649322509766, 'logits/chosen': -0.6369616389274597, 'logits/rejected': -0.5970015525817871, 'epoch': 0.14} + 14%|█▍ | 97/681 [04:08<24:35, 2.53s/it] 14%|█▍ | 98/681 [04:10<24:31, 2.52s/it] {'loss': 1.074, 'grad_norm': 24.021251678466797, 'learning_rate': 4.974220459770639e-07, 'fcm_dpo/beta': 0.01634235680103302, 'fcm_dpo/q_t': 0.38946154713630676, 'fcm_dpo/delta': -0.08897878974676132, 'fcm_dpo/margin': 29.64310073852539, 'margin_dpo/margin_mean': 29.64310073852539, 'margin_dpo/margin_std': 42.92453384399414, 'logps/chosen': -92.54368591308594, 'logps/rejected': -153.45565795898438, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'KL/chosen_KL_mean': -27.28506088256836, 'KL/rejected_KL_mean': -56.92816162109375, 'KL/mean': -42.106605529785156, 'KL/std': 35.70084762573242, 'logits/chosen': -0.6834473013877869, 'logits/rejected': -0.669763445854187, 'epoch': 0.14} + 14%|█▍ | 98/681 [04:10<24:31, 2.52s/it] 15%|█▍ | 99/681 [04:13<23:33, 2.43s/it] {'loss': 1.0012, 'grad_norm': 20.537532806396484, 'learning_rate': 4.972349644343108e-07, 'fcm_dpo/beta': 0.015781186521053314, 'fcm_dpo/q_t': 0.3759078085422516, 'fcm_dpo/delta': -0.19561892747879028, 'fcm_dpo/margin': 36.968345642089844, 'margin_dpo/margin_mean': 36.968345642089844, 'margin_dpo/margin_std': 47.97369384765625, 'logps/chosen': -66.72130584716797, 'logps/rejected': -144.48910522460938, 'logps/ref_chosen': -45.638484954833984, 'logps/ref_rejected': -86.43793487548828, 'KL/chosen_KL_mean': -21.082822799682617, 'KL/rejected_KL_mean': -58.05116271972656, 'KL/mean': -39.566993713378906, 'KL/std': 38.398990631103516, 'logits/chosen': -0.637118935585022, 'logits/rejected': -0.639615535736084, 'epoch': 0.15} + 15%|█▍ | 99/681 [04:13<23:33, 2.43s/it] 15%|█▍ | 100/681 [04:15<24:04, 2.49s/it] {'loss': 1.1847, 'grad_norm': 23.365909576416016, 'learning_rate': 4.970413680203148e-07, 'fcm_dpo/beta': 0.015916183590888977, 'fcm_dpo/q_t': 0.4260128140449524, 'fcm_dpo/delta': 0.07390052080154419, 'fcm_dpo/margin': 20.597900390625, 'margin_dpo/margin_mean': 20.597902297973633, 'margin_dpo/margin_std': 39.958717346191406, 'logps/chosen': -81.49853515625, 'logps/rejected': -118.56266784667969, 'logps/ref_chosen': -57.59397888183594, 'logps/ref_rejected': -74.06021118164062, 'KL/chosen_KL_mean': -23.904550552368164, 'KL/rejected_KL_mean': -44.5024528503418, 'KL/mean': -34.20349884033203, 'KL/std': 30.948299407958984, 'logits/chosen': -0.6662120819091797, 'logits/rejected': -0.6264818906784058, 'epoch': 0.15} + 15%|█▍ | 100/681 [04:15<24:04, 2.49s/it] 15%|█▍ | 101/681 [04:18<23:47, 2.46s/it] {'loss': 1.1411, 'grad_norm': 23.029918670654297, 'learning_rate': 4.968412618365215e-07, 'fcm_dpo/beta': 0.015804601833224297, 'fcm_dpo/q_t': 0.41378289461135864, 'fcm_dpo/delta': -0.006234418600797653, 'fcm_dpo/margin': 25.647552490234375, 'margin_dpo/margin_mean': 25.647552490234375, 'margin_dpo/margin_std': 45.90587615966797, 'logps/chosen': -90.95987701416016, 'logps/rejected': -138.1482696533203, 'logps/ref_chosen': -61.64885330200195, 'logps/ref_rejected': -83.18968200683594, 'KL/chosen_KL_mean': -29.311023712158203, 'KL/rejected_KL_mean': -54.958587646484375, 'KL/mean': -42.13480758666992, 'KL/std': 38.1388053894043, 'logits/chosen': -0.6485938429832458, 'logits/rejected': -0.6187626123428345, 'epoch': 0.15} + 15%|█▍ | 101/681 [04:18<23:47, 2.46s/it] 15%|█▍ | 102/681 [04:20<23:33, 2.44s/it] {'loss': 1.222, 'grad_norm': 26.079177856445312, 'learning_rate': 4.966346511559149e-07, 'fcm_dpo/beta': 0.015855927020311356, 'fcm_dpo/q_t': 0.43402665853500366, 'fcm_dpo/delta': -0.019583335146307945, 'fcm_dpo/margin': 18.64190673828125, 'margin_dpo/margin_mean': 18.641904830932617, 'margin_dpo/margin_std': 42.348793029785156, 'logps/chosen': -95.34840393066406, 'logps/rejected': -118.09850311279297, 'logps/ref_chosen': -64.0788803100586, 'logps/ref_rejected': -68.18707275390625, 'KL/chosen_KL_mean': -31.269521713256836, 'KL/rejected_KL_mean': -49.91142654418945, 'KL/mean': -40.59046936035156, 'KL/std': 35.46381378173828, 'logits/chosen': -0.6906998157501221, 'logits/rejected': -0.6490976810455322, 'epoch': 0.15} + 15%|█▍ | 102/681 [04:20<23:33, 2.44s/it] 15%|█▌ | 103/681 [04:22<23:13, 2.41s/it] {'loss': 1.0005, 'grad_norm': 22.30409812927246, 'learning_rate': 4.964215414228785e-07, 'fcm_dpo/beta': 0.015347588807344437, 'fcm_dpo/q_t': 0.37476682662963867, 'fcm_dpo/delta': -0.19285300374031067, 'fcm_dpo/margin': 37.86910629272461, 'margin_dpo/margin_mean': 37.869102478027344, 'margin_dpo/margin_std': 48.369873046875, 'logps/chosen': -86.39921569824219, 'logps/rejected': -156.54176330566406, 'logps/ref_chosen': -61.299278259277344, 'logps/ref_rejected': -93.57270812988281, 'KL/chosen_KL_mean': -25.099937438964844, 'KL/rejected_KL_mean': -62.969051361083984, 'KL/mean': -44.034488677978516, 'KL/std': 39.63392639160156, 'logits/chosen': -0.6355269551277161, 'logits/rejected': -0.6004114151000977, 'epoch': 0.15} + 15%|█▌ | 103/681 [04:22<23:13, 2.41s/it] 15%|█▌ | 104/681 [04:25<22:39, 2.36s/it] {'loss': 1.0535, 'grad_norm': 21.63632583618164, 'learning_rate': 4.96201938253052e-07, 'fcm_dpo/beta': 0.014920437708497047, 'fcm_dpo/q_t': 0.38805246353149414, 'fcm_dpo/delta': -0.14584705233573914, 'fcm_dpo/margin': 36.067962646484375, 'margin_dpo/margin_mean': 36.06795883178711, 'margin_dpo/margin_std': 53.349693298339844, 'logps/chosen': -81.35029602050781, 'logps/rejected': -152.61019897460938, 'logps/ref_chosen': -54.372772216796875, 'logps/ref_rejected': -89.5647201538086, 'KL/chosen_KL_mean': -26.97751808166504, 'KL/rejected_KL_mean': -63.04547119140625, 'KL/mean': -45.011497497558594, 'KL/std': 43.440818786621094, 'logits/chosen': -0.6813393831253052, 'logits/rejected': -0.6534780859947205, 'epoch': 0.15} + 15%|█▌ | 104/681 [04:25<22:39, 2.36s/it] 15%|█▌ | 105/681 [04:27<23:21, 2.43s/it] {'loss': 0.8843, 'grad_norm': 21.752716064453125, 'learning_rate': 4.959758474331832e-07, 'fcm_dpo/beta': 0.014073311351239681, 'fcm_dpo/q_t': 0.3382055163383484, 'fcm_dpo/delta': -0.34200507402420044, 'fcm_dpo/margin': 50.99995422363281, 'margin_dpo/margin_mean': 50.99995803833008, 'margin_dpo/margin_std': 46.56443405151367, 'logps/chosen': -79.9150390625, 'logps/rejected': -174.24957275390625, 'logps/ref_chosen': -54.638946533203125, 'logps/ref_rejected': -97.97351837158203, 'KL/chosen_KL_mean': -25.276098251342773, 'KL/rejected_KL_mean': -76.27605438232422, 'KL/mean': -50.77607727050781, 'KL/std': 41.882408142089844, 'logits/chosen': -0.6410149931907654, 'logits/rejected': -0.6186502575874329, 'epoch': 0.15} + 15%|█▌ | 105/681 [04:27<23:21, 2.43s/it] 16%|█▌ | 106/681 [04:30<23:27, 2.45s/it] {'loss': 1.0661, 'grad_norm': 20.311628341674805, 'learning_rate': 4.957432749209755e-07, 'fcm_dpo/beta': 0.013703379780054092, 'fcm_dpo/q_t': 0.3993247449398041, 'fcm_dpo/delta': -0.03964092954993248, 'fcm_dpo/margin': 31.945636749267578, 'margin_dpo/margin_mean': 31.94563865661621, 'margin_dpo/margin_std': 39.89073944091797, 'logps/chosen': -83.21609497070312, 'logps/rejected': -145.553466796875, 'logps/ref_chosen': -54.83289337158203, 'logps/ref_rejected': -85.22461700439453, 'KL/chosen_KL_mean': -28.383203506469727, 'KL/rejected_KL_mean': -60.32884216308594, 'KL/mean': -44.35602569580078, 'KL/std': 36.69441223144531, 'logits/chosen': -0.6390097141265869, 'logits/rejected': -0.6126164197921753, 'epoch': 0.16} + 16%|█▌ | 106/681 [04:30<23:27, 2.45s/it] 16%|█▌ | 107/681 [04:32<23:57, 2.50s/it] {'loss': 1.0566, 'grad_norm': 20.68709373474121, 'learning_rate': 4.955042268449307e-07, 'fcm_dpo/beta': 0.013466178439557552, 'fcm_dpo/q_t': 0.3910368084907532, 'fcm_dpo/delta': -0.087033212184906, 'fcm_dpo/margin': 35.80072021484375, 'margin_dpo/margin_mean': 35.800716400146484, 'margin_dpo/margin_std': 47.552371978759766, 'logps/chosen': -103.6465072631836, 'logps/rejected': -164.47891235351562, 'logps/ref_chosen': -69.70780944824219, 'logps/ref_rejected': -94.73950958251953, 'KL/chosen_KL_mean': -33.93869400024414, 'KL/rejected_KL_mean': -69.7394027709961, 'KL/mean': -51.83905029296875, 'KL/std': 44.899288177490234, 'logits/chosen': -0.6427664756774902, 'logits/rejected': -0.5967296361923218, 'epoch': 0.16} + 16%|█▌ | 107/681 [04:32<23:57, 2.50s/it] 16%|█▌ | 108/681 [04:35<23:33, 2.47s/it] {'loss': 1.0418, 'grad_norm': 20.52142906188965, 'learning_rate': 4.952587095041881e-07, 'fcm_dpo/beta': 0.013104308396577835, 'fcm_dpo/q_t': 0.3812934160232544, 'fcm_dpo/delta': -0.18593883514404297, 'fcm_dpo/margin': 43.90226745605469, 'margin_dpo/margin_mean': 43.90226745605469, 'margin_dpo/margin_std': 64.3550033569336, 'logps/chosen': -85.9057846069336, 'logps/rejected': -169.59417724609375, 'logps/ref_chosen': -56.0098876953125, 'logps/ref_rejected': -95.79601287841797, 'KL/chosen_KL_mean': -29.895898818969727, 'KL/rejected_KL_mean': -73.79816436767578, 'KL/mean': -51.84703063964844, 'KL/std': 49.70708465576172, 'logits/chosen': -0.6195969581604004, 'logits/rejected': -0.5994083881378174, 'epoch': 0.16} + 16%|█▌ | 108/681 [04:35<23:33, 2.47s/it] 16%|█▌ | 109/681 [04:38<24:48, 2.60s/it] {'loss': 1.0005, 'grad_norm': 21.661203384399414, 'learning_rate': 4.95006729368358e-07, 'fcm_dpo/beta': 0.012563558295369148, 'fcm_dpo/q_t': 0.37124601006507874, 'fcm_dpo/delta': -0.19107185304164886, 'fcm_dpo/margin': 46.09014892578125, 'margin_dpo/margin_mean': 46.090152740478516, 'margin_dpo/margin_std': 56.48835754394531, 'logps/chosen': -90.84333801269531, 'logps/rejected': -172.73373413085938, 'logps/ref_chosen': -62.88549041748047, 'logps/ref_rejected': -98.68573760986328, 'KL/chosen_KL_mean': -27.957853317260742, 'KL/rejected_KL_mean': -74.04800415039062, 'KL/mean': -51.0029296875, 'KL/std': 47.15357971191406, 'logits/chosen': -0.5873284339904785, 'logits/rejected': -0.5662369132041931, 'epoch': 0.16} + 16%|█▌ | 109/681 [04:38<24:48, 2.60s/it] 16%|█▌ | 110/681 [04:40<24:54, 2.62s/it] {'loss': 1.0618, 'grad_norm': 18.499637603759766, 'learning_rate': 4.947482930773511e-07, 'fcm_dpo/beta': 0.012130336835980415, 'fcm_dpo/q_t': 0.3895985782146454, 'fcm_dpo/delta': -0.11256064474582672, 'fcm_dpo/margin': 41.43762969970703, 'margin_dpo/margin_mean': 41.43762969970703, 'margin_dpo/margin_std': 56.833656311035156, 'logps/chosen': -87.68887329101562, 'logps/rejected': -150.1228485107422, 'logps/ref_chosen': -58.753684997558594, 'logps/ref_rejected': -79.75001525878906, 'KL/chosen_KL_mean': -28.93518829345703, 'KL/rejected_KL_mean': -70.37283325195312, 'KL/mean': -49.65400695800781, 'KL/std': 47.03770065307617, 'logits/chosen': -0.5971484780311584, 'logits/rejected': -0.5627081394195557, 'epoch': 0.16} + 16%|█▌ | 110/681 [04:40<24:54, 2.62s/it] 16%|█▋ | 111/681 [04:43<24:36, 2.59s/it] {'loss': 1.0405, 'grad_norm': 21.36251449584961, 'learning_rate': 4.944834074412042e-07, 'fcm_dpo/beta': 0.011841144412755966, 'fcm_dpo/q_t': 0.37844541668891907, 'fcm_dpo/delta': -0.1690835952758789, 'fcm_dpo/margin': 47.13909149169922, 'margin_dpo/margin_mean': 47.13909149169922, 'margin_dpo/margin_std': 65.69253540039062, 'logps/chosen': -101.31826782226562, 'logps/rejected': -178.26210021972656, 'logps/ref_chosen': -68.62410736083984, 'logps/ref_rejected': -98.42886352539062, 'KL/chosen_KL_mean': -32.69416046142578, 'KL/rejected_KL_mean': -79.83323669433594, 'KL/mean': -56.263702392578125, 'KL/std': 52.9500732421875, 'logits/chosen': -0.6595109701156616, 'logits/rejected': -0.6392641067504883, 'epoch': 0.16} + 16%|█▋ | 111/681 [04:43<24:36, 2.59s/it] 16%|█▋ | 112/681 [04:45<23:35, 2.49s/it] {'loss': 1.1323, 'grad_norm': 19.11754035949707, 'learning_rate': 4.942120794399002e-07, 'fcm_dpo/beta': 0.011842923238873482, 'fcm_dpo/q_t': 0.41945815086364746, 'fcm_dpo/delta': 0.050649721175432205, 'fcm_dpo/margin': 29.636615753173828, 'margin_dpo/margin_mean': 29.636615753173828, 'margin_dpo/margin_std': 44.190711975097656, 'logps/chosen': -80.24394226074219, 'logps/rejected': -124.40534973144531, 'logps/ref_chosen': -50.24964141845703, 'logps/ref_rejected': -64.77442932128906, 'KL/chosen_KL_mean': -29.994304656982422, 'KL/rejected_KL_mean': -59.63092041015625, 'KL/mean': -44.81261444091797, 'KL/std': 36.23802947998047, 'logits/chosen': -0.6208142042160034, 'logits/rejected': -0.5849310159683228, 'epoch': 0.16} + 16%|█▋ | 112/681 [04:45<23:35, 2.49s/it] 17%|█▋ | 113/681 [04:48<23:55, 2.53s/it] {'loss': 1.1054, 'grad_norm': 19.75728988647461, 'learning_rate': 4.939343162231841e-07, 'fcm_dpo/beta': 0.011969354934990406, 'fcm_dpo/q_t': 0.41338014602661133, 'fcm_dpo/delta': 0.024007823318243027, 'fcm_dpo/margin': 31.487873077392578, 'margin_dpo/margin_mean': 31.487873077392578, 'margin_dpo/margin_std': 42.64485549926758, 'logps/chosen': -104.65503692626953, 'logps/rejected': -147.39865112304688, 'logps/ref_chosen': -66.71295166015625, 'logps/ref_rejected': -77.96870422363281, 'KL/chosen_KL_mean': -37.94208526611328, 'KL/rejected_KL_mean': -69.4299545288086, 'KL/mean': -53.6860237121582, 'KL/std': 38.465965270996094, 'logits/chosen': -0.5755143165588379, 'logits/rejected': -0.5319409370422363, 'epoch': 0.17} + 17%|█▋ | 113/681 [04:48<23:55, 2.53s/it] 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] {'loss': 1.0109, 'grad_norm': 21.318517684936523, 'learning_rate': 4.936501251103751e-07, 'fcm_dpo/beta': 0.011564414948225021, 'fcm_dpo/q_t': 0.3780399262905121, 'fcm_dpo/delta': -0.19607561826705933, 'fcm_dpo/margin': 50.390052795410156, 'margin_dpo/margin_mean': 50.390052795410156, 'margin_dpo/margin_std': 69.09920501708984, 'logps/chosen': -92.37635803222656, 'logps/rejected': -172.09097290039062, 'logps/ref_chosen': -57.78507995605469, 'logps/ref_rejected': -87.10966491699219, 'KL/chosen_KL_mean': -34.591270446777344, 'KL/rejected_KL_mean': -84.9813232421875, 'KL/mean': -59.78630065917969, 'KL/std': 56.91395568847656, 'logits/chosen': -0.5932904481887817, 'logits/rejected': -0.562382698059082, 'epoch': 0.17} + 17%|█▋ | 114/681 [04:50<23:46, 2.52s/it] 17%|█▋ | 115/681 [04:53<24:11, 2.56s/it] {'loss': 1.1766, 'grad_norm': 26.597360610961914, 'learning_rate': 4.933595135901732e-07, 'fcm_dpo/beta': 0.011562837287783623, 'fcm_dpo/q_t': 0.41715824604034424, 'fcm_dpo/delta': 0.004423616454005241, 'fcm_dpo/margin': 34.20935821533203, 'margin_dpo/margin_mean': 34.20935821533203, 'margin_dpo/margin_std': 71.29178619384766, 'logps/chosen': -110.321044921875, 'logps/rejected': -177.51327514648438, 'logps/ref_chosen': -65.5826416015625, 'logps/ref_rejected': -98.56552124023438, 'KL/chosen_KL_mean': -44.7384033203125, 'KL/rejected_KL_mean': -78.94776153564453, 'KL/mean': -61.84308624267578, 'KL/std': 54.04515838623047, 'logits/chosen': -0.619019627571106, 'logits/rejected': -0.6011543273925781, 'epoch': 0.17} + 17%|█▋ | 115/681 [04:53<24:11, 2.56s/it] 17%|█▋ | 116/681 [04:55<23:28, 2.49s/it] {'loss': 1.0662, 'grad_norm': 21.224323272705078, 'learning_rate': 4.930624893204624e-07, 'fcm_dpo/beta': 0.011507030576467514, 'fcm_dpo/q_t': 0.40007728338241577, 'fcm_dpo/delta': -0.04481929540634155, 'fcm_dpo/margin': 38.464725494384766, 'margin_dpo/margin_mean': 38.464725494384766, 'margin_dpo/margin_std': 49.343894958496094, 'logps/chosen': -84.45022583007812, 'logps/rejected': -152.0364990234375, 'logps/ref_chosen': -51.40031433105469, 'logps/ref_rejected': -80.5218505859375, 'KL/chosen_KL_mean': -33.04991912841797, 'KL/rejected_KL_mean': -71.5146484375, 'KL/mean': -52.28227996826172, 'KL/std': 45.42361831665039, 'logits/chosen': -0.5934985876083374, 'logits/rejected': -0.5850518345832825, 'epoch': 0.17} + 17%|█▋ | 116/681 [04:55<23:28, 2.49s/it] 17%|█▋ | 117/681 [04:58<23:19, 2.48s/it] {'loss': 1.1488, 'grad_norm': 25.782733917236328, 'learning_rate': 4.927590601281083e-07, 'fcm_dpo/beta': 0.011499254032969475, 'fcm_dpo/q_t': 0.41948583722114563, 'fcm_dpo/delta': 0.041788313537836075, 'fcm_dpo/margin': 31.281452178955078, 'margin_dpo/margin_mean': 31.281452178955078, 'margin_dpo/margin_std': 54.715789794921875, 'logps/chosen': -111.63506317138672, 'logps/rejected': -140.20208740234375, 'logps/ref_chosen': -69.29840850830078, 'logps/ref_rejected': -66.583984375, 'KL/chosen_KL_mean': -42.33665466308594, 'KL/rejected_KL_mean': -73.61810302734375, 'KL/mean': -57.977378845214844, 'KL/std': 48.001014709472656, 'logits/chosen': -0.5640000104904175, 'logits/rejected': -0.526547372341156, 'epoch': 0.17} + 17%|█▋ | 117/681 [04:58<23:19, 2.48s/it] 17%|█▋ | 118/681 [05:00<23:22, 2.49s/it] {'loss': 1.072, 'grad_norm': 20.143999099731445, 'learning_rate': 4.924492340087524e-07, 'fcm_dpo/beta': 0.011471563950181007, 'fcm_dpo/q_t': 0.40212157368659973, 'fcm_dpo/delta': -0.03593885153532028, 'fcm_dpo/margin': 37.86691665649414, 'margin_dpo/margin_mean': 37.86691665649414, 'margin_dpo/margin_std': 49.769737243652344, 'logps/chosen': -88.96061706542969, 'logps/rejected': -146.85560607910156, 'logps/ref_chosen': -55.6409797668457, 'logps/ref_rejected': -75.66905975341797, 'KL/chosen_KL_mean': -33.31963348388672, 'KL/rejected_KL_mean': -71.1865463256836, 'KL/mean': -52.253089904785156, 'KL/std': 43.34165954589844, 'logits/chosen': -0.6188483238220215, 'logits/rejected': -0.6008873581886292, 'epoch': 0.17} + 17%|█▋ | 118/681 [05:00<23:22, 2.49s/it] 17%|█▋ | 119/681 [05:03<23:55, 2.55s/it] {'loss': 1.1113, 'grad_norm': 22.411529541015625, 'learning_rate': 4.92133019126601e-07, 'fcm_dpo/beta': 0.011316780932247639, 'fcm_dpo/q_t': 0.407728374004364, 'fcm_dpo/delta': -0.02757979929447174, 'fcm_dpo/margin': 37.58320236206055, 'margin_dpo/margin_mean': 37.58320236206055, 'margin_dpo/margin_std': 60.125755310058594, 'logps/chosen': -119.32390594482422, 'logps/rejected': -186.37420654296875, 'logps/ref_chosen': -73.51019287109375, 'logps/ref_rejected': -102.977294921875, 'KL/chosen_KL_mean': -45.8137092590332, 'KL/rejected_KL_mean': -83.39691162109375, 'KL/mean': -64.60531616210938, 'KL/std': 48.94361877441406, 'logits/chosen': -0.6017279624938965, 'logits/rejected': -0.5897752046585083, 'epoch': 0.17} + 17%|█▋ | 119/681 [05:03<23:55, 2.55s/it] 18%|█▊ | 120/681 [05:05<24:17, 2.60s/it] {'loss': 0.9987, 'grad_norm': 21.354141235351562, 'learning_rate': 4.918104238142103e-07, 'fcm_dpo/beta': 0.011053888127207756, 'fcm_dpo/q_t': 0.37308794260025024, 'fcm_dpo/delta': -0.1890048086643219, 'fcm_dpo/margin': 52.28166961669922, 'margin_dpo/margin_mean': 52.281673431396484, 'margin_dpo/margin_std': 64.23561096191406, 'logps/chosen': -123.29493713378906, 'logps/rejected': -206.81951904296875, 'logps/ref_chosen': -76.78083801269531, 'logps/ref_rejected': -108.02374267578125, 'KL/chosen_KL_mean': -46.51409912109375, 'KL/rejected_KL_mean': -98.7957763671875, 'KL/mean': -72.65493774414062, 'KL/std': 60.961395263671875, 'logits/chosen': -0.6113423109054565, 'logits/rejected': -0.5809808969497681, 'epoch': 0.18} + 18%|█▊ | 120/681 [05:06<24:17, 2.60s/it] 18%|█▊ | 121/681 [05:08<23:58, 2.57s/it] {'loss': 0.9989, 'grad_norm': 23.214689254760742, 'learning_rate': 4.91481456572267e-07, 'fcm_dpo/beta': 0.010542536154389381, 'fcm_dpo/q_t': 0.36856669187545776, 'fcm_dpo/delta': -0.22549036145210266, 'fcm_dpo/margin': 57.878684997558594, 'margin_dpo/margin_mean': 57.878684997558594, 'margin_dpo/margin_std': 74.57313537597656, 'logps/chosen': -107.00697326660156, 'logps/rejected': -213.09033203125, 'logps/ref_chosen': -61.789894104003906, 'logps/ref_rejected': -109.99456787109375, 'KL/chosen_KL_mean': -45.217079162597656, 'KL/rejected_KL_mean': -103.09576416015625, 'KL/mean': -74.15641784667969, 'KL/std': 59.655128479003906, 'logits/chosen': -0.5403860807418823, 'logits/rejected': -0.5361485481262207, 'epoch': 0.18} + 18%|█▊ | 121/681 [05:08<23:58, 2.57s/it] 18%|█▊ | 122/681 [05:10<23:14, 2.50s/it] {'loss': 0.9029, 'grad_norm': 22.87774658203125, 'learning_rate': 4.911461260693638e-07, 'fcm_dpo/beta': 0.009974541142582893, 'fcm_dpo/q_t': 0.343948096036911, 'fcm_dpo/delta': -0.33350038528442383, 'fcm_dpo/margin': 71.17928314208984, 'margin_dpo/margin_mean': 71.17927551269531, 'margin_dpo/margin_std': 70.96698760986328, 'logps/chosen': -88.1077880859375, 'logps/rejected': -219.09906005859375, 'logps/ref_chosen': -46.9022102355957, 'logps/ref_rejected': -106.71418762207031, 'KL/chosen_KL_mean': -41.20557403564453, 'KL/rejected_KL_mean': -112.38485717773438, 'KL/mean': -76.79521179199219, 'KL/std': 67.50093078613281, 'logits/chosen': -0.5265074968338013, 'logits/rejected': -0.5429497957229614, 'epoch': 0.18} + 18%|█▊ | 122/681 [05:10<23:14, 2.50s/it] 18%|█▊ | 123/681 [05:13<23:50, 2.56s/it] {'loss': 1.1135, 'grad_norm': 20.789918899536133, 'learning_rate': 4.908044411417711e-07, 'fcm_dpo/beta': 0.00966709479689598, 'fcm_dpo/q_t': 0.4034884572029114, 'fcm_dpo/delta': -0.051273368299007416, 'fcm_dpo/margin': 46.348785400390625, 'margin_dpo/margin_mean': 46.34878158569336, 'margin_dpo/margin_std': 77.05763244628906, 'logps/chosen': -106.72337341308594, 'logps/rejected': -179.5089111328125, 'logps/ref_chosen': -61.33863830566406, 'logps/ref_rejected': -87.775390625, 'KL/chosen_KL_mean': -45.384735107421875, 'KL/rejected_KL_mean': -91.7335205078125, 'KL/mean': -68.55912780761719, 'KL/std': 57.45310592651367, 'logits/chosen': -0.5363984107971191, 'logits/rejected': -0.5196830034255981, 'epoch': 0.18} + 18%|█▊ | 123/681 [05:13<23:50, 2.56s/it] 18%|█▊ | 124/681 [05:16<23:51, 2.57s/it] {'loss': 1.0166, 'grad_norm': 24.75668716430664, 'learning_rate': 4.904564107932048e-07, 'fcm_dpo/beta': 0.009301427751779556, 'fcm_dpo/q_t': 0.36982783675193787, 'fcm_dpo/delta': -0.26134854555130005, 'fcm_dpo/margin': 69.19841766357422, 'margin_dpo/margin_mean': 69.19841766357422, 'margin_dpo/margin_std': 98.82803344726562, 'logps/chosen': -123.4361572265625, 'logps/rejected': -238.76681518554688, 'logps/ref_chosen': -71.44833374023438, 'logps/ref_rejected': -117.58056640625, 'KL/chosen_KL_mean': -51.98781967163086, 'KL/rejected_KL_mean': -121.18624114990234, 'KL/mean': -86.58702087402344, 'KL/std': 78.8485107421875, 'logits/chosen': -0.5215315222740173, 'logits/rejected': -0.5242322683334351, 'epoch': 0.18} + 18%|█▊ | 124/681 [05:16<23:51, 2.57s/it] 18%|█▊ | 125/681 [05:18<23:36, 2.55s/it] {'loss': 1.034, 'grad_norm': 19.192096710205078, 'learning_rate': 4.90102044194588e-07, 'fcm_dpo/beta': 0.00900559313595295, 'fcm_dpo/q_t': 0.38217341899871826, 'fcm_dpo/delta': -0.1572001874446869, 'fcm_dpo/margin': 60.93578338623047, 'margin_dpo/margin_mean': 60.93578338623047, 'margin_dpo/margin_std': 83.12398529052734, 'logps/chosen': -92.94500732421875, 'logps/rejected': -187.7324676513672, 'logps/ref_chosen': -50.136940002441406, 'logps/ref_rejected': -83.98861694335938, 'KL/chosen_KL_mean': -42.808067321777344, 'KL/rejected_KL_mean': -103.74385070800781, 'KL/mean': -73.27595520019531, 'KL/std': 66.60159301757812, 'logits/chosen': -0.4833022356033325, 'logits/rejected': -0.4855707287788391, 'epoch': 0.18} + 18%|█▊ | 125/681 [05:18<23:36, 2.55s/it] 19%|█▊ | 126/681 [05:21<23:48, 2.57s/it] {'loss': 1.048, 'grad_norm': 20.248746871948242, 'learning_rate': 4.897413506838102e-07, 'fcm_dpo/beta': 0.008764306083321571, 'fcm_dpo/q_t': 0.39040666818618774, 'fcm_dpo/delta': -0.10474735498428345, 'fcm_dpo/margin': 56.966033935546875, 'margin_dpo/margin_mean': 56.966033935546875, 'margin_dpo/margin_std': 76.7184066772461, 'logps/chosen': -101.90567779541016, 'logps/rejected': -201.33441162109375, 'logps/ref_chosen': -55.66706848144531, 'logps/ref_rejected': -98.1297607421875, 'KL/chosen_KL_mean': -46.238609313964844, 'KL/rejected_KL_mean': -103.20464324951172, 'KL/mean': -74.72161865234375, 'KL/std': 60.32928466796875, 'logits/chosen': -0.5261760354042053, 'logits/rejected': -0.5226148366928101, 'epoch': 0.19} + 19%|█▊ | 126/681 [05:21<23:48, 2.57s/it] 19%|█▊ | 127/681 [05:23<24:00, 2.60s/it] {'loss': 1.136, 'grad_norm': 20.812768936157227, 'learning_rate': 4.89374339765481e-07, 'fcm_dpo/beta': 0.008787820115685463, 'fcm_dpo/q_t': 0.4162459075450897, 'fcm_dpo/delta': 0.03892592340707779, 'fcm_dpo/margin': 41.24645233154297, 'margin_dpo/margin_mean': 41.24645233154297, 'margin_dpo/margin_std': 66.14584350585938, 'logps/chosen': -101.34181213378906, 'logps/rejected': -162.82937622070312, 'logps/ref_chosen': -56.55467987060547, 'logps/ref_rejected': -76.7957763671875, 'KL/chosen_KL_mean': -44.78712844848633, 'KL/rejected_KL_mean': -86.03358459472656, 'KL/mean': -65.41035461425781, 'KL/std': 52.55406951904297, 'logits/chosen': -0.5199460983276367, 'logits/rejected': -0.5012995004653931, 'epoch': 0.19} + 19%|█▊ | 127/681 [05:23<24:00, 2.60s/it] 19%|█▉ | 128/681 [05:26<24:12, 2.63s/it] {'loss': 1.1451, 'grad_norm': 27.369966506958008, 'learning_rate': 4.890010211106795e-07, 'fcm_dpo/beta': 0.008857084438204765, 'fcm_dpo/q_t': 0.4142889976501465, 'fcm_dpo/delta': 0.014133721590042114, 'fcm_dpo/margin': 43.582611083984375, 'margin_dpo/margin_mean': 43.58261489868164, 'margin_dpo/margin_std': 76.90882873535156, 'logps/chosen': -105.71485900878906, 'logps/rejected': -167.615478515625, 'logps/ref_chosen': -58.12095642089844, 'logps/ref_rejected': -76.43896484375, 'KL/chosen_KL_mean': -47.59389877319336, 'KL/rejected_KL_mean': -91.176513671875, 'KL/mean': -69.38520812988281, 'KL/std': 61.367488861083984, 'logits/chosen': -0.49865514039993286, 'logits/rejected': -0.47880756855010986, 'epoch': 0.19} + 19%|█▉ | 128/681 [05:26<24:12, 2.63s/it] 19%|█▉ | 129/681 [05:29<24:00, 2.61s/it] {'loss': 1.1535, 'grad_norm': 20.145679473876953, 'learning_rate': 4.88621404556699e-07, 'fcm_dpo/beta': 0.008825141936540604, 'fcm_dpo/q_t': 0.4152906835079193, 'fcm_dpo/delta': -0.0057245357893407345, 'fcm_dpo/margin': 45.94677734375, 'margin_dpo/margin_mean': 45.94677734375, 'margin_dpo/margin_std': 89.10274505615234, 'logps/chosen': -124.58363342285156, 'logps/rejected': -200.25625610351562, 'logps/ref_chosen': -66.91637420654297, 'logps/ref_rejected': -96.6422119140625, 'KL/chosen_KL_mean': -57.667259216308594, 'KL/rejected_KL_mean': -103.61404418945312, 'KL/mean': -80.6406478881836, 'KL/std': 68.38490295410156, 'logits/chosen': -0.5370001196861267, 'logits/rejected': -0.5274189710617065, 'epoch': 0.19} + 19%|█▉ | 129/681 [05:29<24:00, 2.61s/it] 19%|█▉ | 130/681 [05:31<23:23, 2.55s/it] {'loss': 1.0019, 'grad_norm': 19.886871337890625, 'learning_rate': 4.882355001067891e-07, 'fcm_dpo/beta': 0.008637124672532082, 'fcm_dpo/q_t': 0.37180206179618835, 'fcm_dpo/delta': -0.2089286893606186, 'fcm_dpo/margin': 69.05598449707031, 'margin_dpo/margin_mean': 69.05598449707031, 'margin_dpo/margin_std': 84.9914779663086, 'logps/chosen': -87.31417846679688, 'logps/rejected': -194.48497009277344, 'logps/ref_chosen': -44.66685104370117, 'logps/ref_rejected': -82.78165435791016, 'KL/chosen_KL_mean': -42.64732360839844, 'KL/rejected_KL_mean': -111.70331573486328, 'KL/mean': -77.17532348632812, 'KL/std': 69.53358459472656, 'logits/chosen': -0.5179574489593506, 'logits/rejected': -0.513495147228241, 'epoch': 0.19} + 19%|█▉ | 130/681 [05:31<23:23, 2.55s/it] 19%|█▉ | 131/681 [05:34<23:13, 2.53s/it] {'loss': 0.9848, 'grad_norm': 27.72515869140625, 'learning_rate': 4.878433179298909e-07, 'fcm_dpo/beta': 0.008221091702580452, 'fcm_dpo/q_t': 0.3706004023551941, 'fcm_dpo/delta': -0.1789543628692627, 'fcm_dpo/margin': 69.14730834960938, 'margin_dpo/margin_mean': 69.14730834960938, 'margin_dpo/margin_std': 75.87403869628906, 'logps/chosen': -83.11874389648438, 'logps/rejected': -195.78549194335938, 'logps/ref_chosen': -44.924591064453125, 'logps/ref_rejected': -88.44401550292969, 'KL/chosen_KL_mean': -38.19416046142578, 'KL/rejected_KL_mean': -107.34147644042969, 'KL/mean': -72.767822265625, 'KL/std': 69.4886474609375, 'logits/chosen': -0.4905538558959961, 'logits/rejected': -0.49695295095443726, 'epoch': 0.19} + 19%|█▉ | 131/681 [05:34<23:13, 2.53s/it] 19%|█▉ | 132/681 [05:36<23:19, 2.55s/it] {'loss': 1.0917, 'grad_norm': 19.825626373291016, 'learning_rate': 4.874448683603694e-07, 'fcm_dpo/beta': 0.008070580661296844, 'fcm_dpo/q_t': 0.40248197317123413, 'fcm_dpo/delta': -0.05931827053427696, 'fcm_dpo/margin': 56.55363082885742, 'margin_dpo/margin_mean': 56.55363082885742, 'margin_dpo/margin_std': 89.22288513183594, 'logps/chosen': -109.51251220703125, 'logps/rejected': -194.95721435546875, 'logps/ref_chosen': -59.00108337402344, 'logps/ref_rejected': -87.89215087890625, 'KL/chosen_KL_mean': -50.51142120361328, 'KL/rejected_KL_mean': -107.0650634765625, 'KL/mean': -78.78823852539062, 'KL/std': 68.71534729003906, 'logits/chosen': -0.5076676607131958, 'logits/rejected': -0.5060294270515442, 'epoch': 0.19} + 19%|█▉ | 132/681 [05:36<23:19, 2.55s/it] 20%|█▉ | 133/681 [05:39<23:32, 2.58s/it] {'loss': 1.115, 'grad_norm': 27.044200897216797, 'learning_rate': 4.870401618977415e-07, 'fcm_dpo/beta': 0.008044019341468811, 'fcm_dpo/q_t': 0.41164666414260864, 'fcm_dpo/delta': -0.002142667770385742, 'fcm_dpo/margin': 49.97242736816406, 'margin_dpo/margin_mean': 49.97242736816406, 'margin_dpo/margin_std': 79.255859375, 'logps/chosen': -126.82603454589844, 'logps/rejected': -206.52752685546875, 'logps/ref_chosen': -66.60449981689453, 'logps/ref_rejected': -96.33355712890625, 'KL/chosen_KL_mean': -60.22153854370117, 'KL/rejected_KL_mean': -110.1939697265625, 'KL/mean': -85.20774841308594, 'KL/std': 62.262081146240234, 'logits/chosen': -0.5105775594711304, 'logits/rejected': -0.49744895100593567, 'epoch': 0.2} + 20%|█▉ | 133/681 [05:39<23:32, 2.58s/it] 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] {'loss': 1.0719, 'grad_norm': 18.76936149597168, 'learning_rate': 4.866292092063986e-07, 'fcm_dpo/beta': 0.00804828479886055, 'fcm_dpo/q_t': 0.4034237563610077, 'fcm_dpo/delta': -0.03093547746539116, 'fcm_dpo/margin': 53.36433029174805, 'margin_dpo/margin_mean': 53.36433410644531, 'margin_dpo/margin_std': 69.71368408203125, 'logps/chosen': -99.26676940917969, 'logps/rejected': -188.21636962890625, 'logps/ref_chosen': -52.06925582885742, 'logps/ref_rejected': -87.6545181274414, 'KL/chosen_KL_mean': -47.19751739501953, 'KL/rejected_KL_mean': -100.56185150146484, 'KL/mean': -73.87967681884766, 'KL/std': 60.19834518432617, 'logits/chosen': -0.4705553650856018, 'logits/rejected': -0.45708662271499634, 'epoch': 0.2} + 20%|█▉ | 134/681 [05:41<22:54, 2.51s/it] 20%|█▉ | 135/681 [05:44<22:42, 2.49s/it] {'loss': 0.998, 'grad_norm': 23.092226028442383, 'learning_rate': 4.862120211153265e-07, 'fcm_dpo/beta': 0.007756436243653297, 'fcm_dpo/q_t': 0.37159401178359985, 'fcm_dpo/delta': -0.21430166065692902, 'fcm_dpo/margin': 77.52987670898438, 'margin_dpo/margin_mean': 77.52987670898438, 'margin_dpo/margin_std': 98.24815368652344, 'logps/chosen': -102.91890716552734, 'logps/rejected': -246.0746612548828, 'logps/ref_chosen': -50.353858947753906, 'logps/ref_rejected': -115.97975158691406, 'KL/chosen_KL_mean': -52.56504821777344, 'KL/rejected_KL_mean': -130.09490966796875, 'KL/mean': -91.32998657226562, 'KL/std': 81.02059936523438, 'logits/chosen': -0.4758632481098175, 'logits/rejected': -0.5096943974494934, 'epoch': 0.2} + 20%|█▉ | 135/681 [05:44<22:42, 2.49s/it] 20%|█▉ | 136/681 [05:46<23:13, 2.56s/it] {'loss': 1.1452, 'grad_norm': 20.256174087524414, 'learning_rate': 4.857886086178193e-07, 'fcm_dpo/beta': 0.00763201666995883, 'fcm_dpo/q_t': 0.41930729150772095, 'fcm_dpo/delta': 0.0035090260207653046, 'fcm_dpo/margin': 51.88257598876953, 'margin_dpo/margin_mean': 51.88257598876953, 'margin_dpo/margin_std': 95.30119323730469, 'logps/chosen': -127.79344940185547, 'logps/rejected': -210.92474365234375, 'logps/ref_chosen': -65.072509765625, 'logps/ref_rejected': -96.32122802734375, 'KL/chosen_KL_mean': -62.72093963623047, 'KL/rejected_KL_mean': -114.603515625, 'KL/mean': -88.66222381591797, 'KL/std': 74.2306137084961, 'logits/chosen': -0.4780592918395996, 'logits/rejected': -0.4707057476043701, 'epoch': 0.2} + 20%|█▉ | 136/681 [05:46<23:13, 2.56s/it] 20%|██ | 137/681 [05:49<23:08, 2.55s/it] {'loss': 1.0282, 'grad_norm': 17.79768180847168, 'learning_rate': 4.853589828711902e-07, 'fcm_dpo/beta': 0.007428483106195927, 'fcm_dpo/q_t': 0.3775022029876709, 'fcm_dpo/delta': -0.21530447900295258, 'fcm_dpo/margin': 81.04991912841797, 'margin_dpo/margin_mean': 81.0499267578125, 'margin_dpo/margin_std': 117.30170440673828, 'logps/chosen': -108.43620300292969, 'logps/rejected': -254.5907745361328, 'logps/ref_chosen': -48.759117126464844, 'logps/ref_rejected': -113.86376953125, 'KL/chosen_KL_mean': -59.67708206176758, 'KL/rejected_KL_mean': -140.7270050048828, 'KL/mean': -100.20204162597656, 'KL/std': 97.27854919433594, 'logits/chosen': -0.4339534640312195, 'logits/rejected': -0.46067190170288086, 'epoch': 0.2} + 20%|██ | 137/681 [05:49<23:08, 2.55s/it] 20%|██ | 138/681 [05:51<22:26, 2.48s/it] {'loss': 1.0574, 'grad_norm': 20.16287612915039, 'learning_rate': 4.849231551964771e-07, 'fcm_dpo/beta': 0.007311869412660599, 'fcm_dpo/q_t': 0.3972048759460449, 'fcm_dpo/delta': -0.051259323954582214, 'fcm_dpo/margin': 61.40470886230469, 'margin_dpo/margin_mean': 61.40470886230469, 'margin_dpo/margin_std': 75.6790771484375, 'logps/chosen': -122.79869842529297, 'logps/rejected': -216.88070678710938, 'logps/ref_chosen': -60.519649505615234, 'logps/ref_rejected': -93.19694519042969, 'KL/chosen_KL_mean': -62.279048919677734, 'KL/rejected_KL_mean': -123.68376159667969, 'KL/mean': -92.98139953613281, 'KL/std': 73.94004821777344, 'logits/chosen': -0.4445374608039856, 'logits/rejected': -0.43380314111709595, 'epoch': 0.2} + 20%|██ | 138/681 [05:51<22:26, 2.48s/it] 20%|██ | 139/681 [05:54<22:08, 2.45s/it] {'loss': 1.0226, 'grad_norm': 19.195222854614258, 'learning_rate': 4.844811370781446e-07, 'fcm_dpo/beta': 0.007158602587878704, 'fcm_dpo/q_t': 0.3843567967414856, 'fcm_dpo/delta': -0.12749908864498138, 'fcm_dpo/margin': 72.78312683105469, 'margin_dpo/margin_mean': 72.78312683105469, 'margin_dpo/margin_std': 90.51847839355469, 'logps/chosen': -98.74073028564453, 'logps/rejected': -204.36044311523438, 'logps/ref_chosen': -46.89138412475586, 'logps/ref_rejected': -79.72798156738281, 'KL/chosen_KL_mean': -51.84934616088867, 'KL/rejected_KL_mean': -124.6324691772461, 'KL/mean': -88.24090576171875, 'KL/std': 69.78694152832031, 'logits/chosen': -0.43394410610198975, 'logits/rejected': -0.4245094060897827, 'epoch': 0.2} + 20%|██ | 139/681 [05:54<22:08, 2.45s/it] 21%|██ | 140/681 [05:56<21:53, 2.43s/it] {'loss': 1.0714, 'grad_norm': 19.862146377563477, 'learning_rate': 4.840329401637809e-07, 'fcm_dpo/beta': 0.007023262791335583, 'fcm_dpo/q_t': 0.39678555727005005, 'fcm_dpo/delta': -0.06672540307044983, 'fcm_dpo/margin': 66.00922393798828, 'margin_dpo/margin_mean': 66.00922393798828, 'margin_dpo/margin_std': 93.16436767578125, 'logps/chosen': -121.70032501220703, 'logps/rejected': -212.01895141601562, 'logps/ref_chosen': -58.97471618652344, 'logps/ref_rejected': -83.28410339355469, 'KL/chosen_KL_mean': -62.725608825683594, 'KL/rejected_KL_mean': -128.73483276367188, 'KL/mean': -95.730224609375, 'KL/std': 77.60523986816406, 'logits/chosen': -0.38486558198928833, 'logits/rejected': -0.36864161491394043, 'epoch': 0.21} + 21%|██ | 140/681 [05:56<21:53, 2.43s/it] 21%|██ | 141/681 [05:59<22:19, 2.48s/it] {'loss': 1.1045, 'grad_norm': 24.785545349121094, 'learning_rate': 4.83578576263792e-07, 'fcm_dpo/beta': 0.00697126192972064, 'fcm_dpo/q_t': 0.40241730213165283, 'fcm_dpo/delta': -0.0359983891248703, 'fcm_dpo/margin': 62.32007598876953, 'margin_dpo/margin_mean': 62.320072174072266, 'margin_dpo/margin_std': 98.26313781738281, 'logps/chosen': -145.74136352539062, 'logps/rejected': -231.17803955078125, 'logps/ref_chosen': -75.07566833496094, 'logps/ref_rejected': -98.1922607421875, 'KL/chosen_KL_mean': -70.66569519042969, 'KL/rejected_KL_mean': -132.9857635498047, 'KL/mean': -101.82574462890625, 'KL/std': 85.2192611694336, 'logits/chosen': -0.43056273460388184, 'logits/rejected': -0.4188095033168793, 'epoch': 0.21} + 21%|██ | 141/681 [05:59<22:19, 2.48s/it] 21%|██ | 142/681 [06:01<23:00, 2.56s/it] {'loss': 1.0855, 'grad_norm': 25.63075065612793, 'learning_rate': 4.83118057351089e-07, 'fcm_dpo/beta': 0.006883557885885239, 'fcm_dpo/q_t': 0.3939516842365265, 'fcm_dpo/delta': -0.0946403294801712, 'fcm_dpo/margin': 71.19023895263672, 'margin_dpo/margin_mean': 71.19023895263672, 'margin_dpo/margin_std': 109.05268859863281, 'logps/chosen': -129.47805786132812, 'logps/rejected': -237.22259521484375, 'logps/ref_chosen': -58.027931213378906, 'logps/ref_rejected': -94.58222961425781, 'KL/chosen_KL_mean': -71.45011901855469, 'KL/rejected_KL_mean': -142.64035034179688, 'KL/mean': -107.04524230957031, 'KL/std': 92.68605041503906, 'logits/chosen': -0.40616393089294434, 'logits/rejected': -0.40541693568229675, 'epoch': 0.21} + 21%|██ | 142/681 [06:01<23:00, 2.56s/it] 21%|██ | 143/681 [06:04<23:28, 2.62s/it] {'loss': 1.1959, 'grad_norm': 22.879791259765625, 'learning_rate': 4.826513955607734e-07, 'fcm_dpo/beta': 0.006882138084620237, 'fcm_dpo/q_t': 0.432314932346344, 'fcm_dpo/delta': 0.08677390962839127, 'fcm_dpo/margin': 45.912410736083984, 'margin_dpo/margin_mean': 45.91241455078125, 'margin_dpo/margin_std': 95.7051010131836, 'logps/chosen': -133.56011962890625, 'logps/rejected': -200.8756561279297, 'logps/ref_chosen': -57.59645080566406, 'logps/ref_rejected': -78.99957275390625, 'KL/chosen_KL_mean': -75.96367645263672, 'KL/rejected_KL_mean': -121.87608337402344, 'KL/mean': -98.91988372802734, 'KL/std': 81.95751953125, 'logits/chosen': -0.36676502227783203, 'logits/rejected': -0.35898709297180176, 'epoch': 0.21} + 21%|██ | 143/681 [06:04<23:28, 2.62s/it] 21%|██ | 144/681 [06:07<23:39, 2.64s/it] {'loss': 1.1116, 'grad_norm': 20.645727157592773, 'learning_rate': 4.821786031898176e-07, 'fcm_dpo/beta': 0.006947984918951988, 'fcm_dpo/q_t': 0.4123944938182831, 'fcm_dpo/delta': 0.012606319040060043, 'fcm_dpo/margin': 55.825904846191406, 'margin_dpo/margin_mean': 55.82590866088867, 'margin_dpo/margin_std': 82.17929077148438, 'logps/chosen': -127.86387634277344, 'logps/rejected': -205.78369140625, 'logps/ref_chosen': -59.90636444091797, 'logps/ref_rejected': -82.00025939941406, 'KL/chosen_KL_mean': -67.95751953125, 'KL/rejected_KL_mean': -123.78343200683594, 'KL/mean': -95.87046813964844, 'KL/std': 69.62263488769531, 'logits/chosen': -0.4141218066215515, 'logits/rejected': -0.40556472539901733, 'epoch': 0.21} + 21%|██ | 144/681 [06:07<23:39, 2.64s/it] 21%|██▏ | 145/681 [06:09<23:16, 2.61s/it] {'loss': 1.0903, 'grad_norm': 23.787511825561523, 'learning_rate': 4.816996926967401e-07, 'fcm_dpo/beta': 0.00693280715495348, 'fcm_dpo/q_t': 0.4049571752548218, 'fcm_dpo/delta': -0.02072506584227085, 'fcm_dpo/margin': 60.55757141113281, 'margin_dpo/margin_mean': 60.55756378173828, 'margin_dpo/margin_std': 85.34068298339844, 'logps/chosen': -122.16694641113281, 'logps/rejected': -203.9901580810547, 'logps/ref_chosen': -56.60066604614258, 'logps/ref_rejected': -77.86631774902344, 'KL/chosen_KL_mean': -65.5662841796875, 'KL/rejected_KL_mean': -126.12384033203125, 'KL/mean': -95.84506225585938, 'KL/std': 70.3823013305664, 'logits/chosen': -0.41279762983322144, 'logits/rejected': -0.39724746346473694, 'epoch': 0.21} + 21%|██▏ | 145/681 [06:09<23:16, 2.61s/it] 21%|██▏ | 146/681 [06:12<23:03, 2.59s/it] {'loss': 1.1846, 'grad_norm': 27.229778289794922, 'learning_rate': 4.812146767012779e-07, 'fcm_dpo/beta': 0.00698929512873292, 'fcm_dpo/q_t': 0.42611053586006165, 'fcm_dpo/delta': 0.0770314633846283, 'fcm_dpo/margin': 46.57452392578125, 'margin_dpo/margin_mean': 46.57452392578125, 'margin_dpo/margin_std': 90.82237243652344, 'logps/chosen': -154.61410522460938, 'logps/rejected': -216.89096069335938, 'logps/ref_chosen': -66.00045013427734, 'logps/ref_rejected': -81.70278930664062, 'KL/chosen_KL_mean': -88.61365509033203, 'KL/rejected_KL_mean': -135.18817138671875, 'KL/mean': -111.90090942382812, 'KL/std': 75.67164611816406, 'logits/chosen': -0.386644184589386, 'logits/rejected': -0.35976487398147583, 'epoch': 0.21} + 21%|██▏ | 146/681 [06:12<23:03, 2.59s/it] 22%|██▏ | 147/681 [06:14<23:03, 2.59s/it] {'loss': 1.0927, 'grad_norm': 20.30803108215332, 'learning_rate': 4.807235679840536e-07, 'fcm_dpo/beta': 0.006981690879911184, 'fcm_dpo/q_t': 0.4026370644569397, 'fcm_dpo/delta': -0.04241678863763809, 'fcm_dpo/margin': 63.08330535888672, 'margin_dpo/margin_mean': 63.083309173583984, 'margin_dpo/margin_std': 94.804443359375, 'logps/chosen': -118.41392517089844, 'logps/rejected': -199.48236083984375, 'logps/ref_chosen': -53.405487060546875, 'logps/ref_rejected': -71.39060974121094, 'KL/chosen_KL_mean': -65.0084457397461, 'KL/rejected_KL_mean': -128.09173583984375, 'KL/mean': -96.55009460449219, 'KL/std': 76.3470687866211, 'logits/chosen': -0.42511412501335144, 'logits/rejected': -0.4061092436313629, 'epoch': 0.22} + 22%|██▏ | 147/681 [06:14<23:03, 2.59s/it] 22%|██▏ | 148/681 [06:17<22:59, 2.59s/it] {'loss': 1.1243, 'grad_norm': 18.78589630126953, 'learning_rate': 4.802263794862384e-07, 'fcm_dpo/beta': 0.0069200447760522366, 'fcm_dpo/q_t': 0.41639888286590576, 'fcm_dpo/delta': -0.08388624340295792, 'fcm_dpo/margin': 54.566932678222656, 'margin_dpo/margin_mean': 54.566932678222656, 'margin_dpo/margin_std': 80.36856842041016, 'logps/chosen': -128.63906860351562, 'logps/rejected': -221.36276245117188, 'logps/ref_chosen': -64.93708038330078, 'logps/ref_rejected': -103.09384155273438, 'KL/chosen_KL_mean': -63.701988220214844, 'KL/rejected_KL_mean': -118.2689208984375, 'KL/mean': -90.98545837402344, 'KL/std': 76.31551361083984, 'logits/chosen': -0.4691488444805145, 'logits/rejected': -0.46223020553588867, 'epoch': 0.22} + 22%|██▏ | 148/681 [06:17<22:59, 2.59s/it] 22%|██▏ | 149/681 [06:20<23:12, 2.62s/it] {'loss': 1.057, 'grad_norm': 18.21072769165039, 'learning_rate': 4.797231243092118e-07, 'fcm_dpo/beta': 0.006747937761247158, 'fcm_dpo/q_t': 0.3960561752319336, 'fcm_dpo/delta': -0.05886346101760864, 'fcm_dpo/margin': 67.35143280029297, 'margin_dpo/margin_mean': 67.3514404296875, 'margin_dpo/margin_std': 81.17979431152344, 'logps/chosen': -119.41095733642578, 'logps/rejected': -227.60336303710938, 'logps/ref_chosen': -58.47376251220703, 'logps/ref_rejected': -99.31474304199219, 'KL/chosen_KL_mean': -60.93719482421875, 'KL/rejected_KL_mean': -128.2886199951172, 'KL/mean': -94.6129150390625, 'KL/std': 68.51680755615234, 'logits/chosen': -0.4674052298069, 'logits/rejected': -0.45243215560913086, 'epoch': 0.22} + 22%|██▏ | 149/681 [06:20<23:12, 2.62s/it] 22%|██▏ | 150/681 [06:22<23:04, 2.61s/it] {'loss': 1.081, 'grad_norm': 17.715530395507812, 'learning_rate': 4.792138157142157e-07, 'fcm_dpo/beta': 0.006690857000648975, 'fcm_dpo/q_t': 0.404508501291275, 'fcm_dpo/delta': -0.04441402480006218, 'fcm_dpo/margin': 65.94608306884766, 'margin_dpo/margin_mean': 65.94608306884766, 'margin_dpo/margin_std': 95.0557861328125, 'logps/chosen': -99.22233581542969, 'logps/rejected': -202.81021118164062, 'logps/ref_chosen': -45.705810546875, 'logps/ref_rejected': -83.34759521484375, 'KL/chosen_KL_mean': -53.51652526855469, 'KL/rejected_KL_mean': -119.46260833740234, 'KL/mean': -86.48956298828125, 'KL/std': 79.87619018554688, 'logits/chosen': -0.46521174907684326, 'logits/rejected': -0.46958300471305847, 'epoch': 0.22} + 22%|██▏ | 150/681 [06:22<23:04, 2.61s/it] 22%|██▏ | 151/681 [06:25<22:31, 2.55s/it] {'loss': 1.0611, 'grad_norm': 19.35520362854004, 'learning_rate': 4.786984671220053e-07, 'fcm_dpo/beta': 0.006673037074506283, 'fcm_dpo/q_t': 0.39813345670700073, 'fcm_dpo/delta': -0.046944983303546906, 'fcm_dpo/margin': 66.66267395019531, 'margin_dpo/margin_mean': 66.66267395019531, 'margin_dpo/margin_std': 83.35139465332031, 'logps/chosen': -137.59255981445312, 'logps/rejected': -234.14822387695312, 'logps/ref_chosen': -70.57083129882812, 'logps/ref_rejected': -100.46382141113281, 'KL/chosen_KL_mean': -67.021728515625, 'KL/rejected_KL_mean': -133.6844024658203, 'KL/mean': -100.35306549072266, 'KL/std': 76.07426452636719, 'logits/chosen': -0.5319645404815674, 'logits/rejected': -0.5065436363220215, 'epoch': 0.22} + 22%|██▏ | 151/681 [06:25<22:31, 2.55s/it] 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] {'loss': 1.0172, 'grad_norm': 20.457353591918945, 'learning_rate': 4.78177092112495e-07, 'fcm_dpo/beta': 0.0065421732142567635, 'fcm_dpo/q_t': 0.3823656737804413, 'fcm_dpo/delta': -0.13599231839179993, 'fcm_dpo/margin': 80.85972595214844, 'margin_dpo/margin_mean': 80.85972595214844, 'margin_dpo/margin_std': 96.03659057617188, 'logps/chosen': -118.93666076660156, 'logps/rejected': -245.7724609375, 'logps/ref_chosen': -60.16438674926758, 'logps/ref_rejected': -106.14045715332031, 'KL/chosen_KL_mean': -58.77227020263672, 'KL/rejected_KL_mean': -139.6320037841797, 'KL/mean': -99.20213317871094, 'KL/std': 78.30158996582031, 'logits/chosen': -0.46430838108062744, 'logits/rejected': -0.46231526136398315, 'epoch': 0.22} + 22%|██▏ | 152/681 [06:27<22:45, 2.58s/it] 22%|██▏ | 153/681 [06:30<22:42, 2.58s/it] {'loss': 1.091, 'grad_norm': 15.283037185668945, 'learning_rate': 4.776497044244016e-07, 'fcm_dpo/beta': 0.006446994375437498, 'fcm_dpo/q_t': 0.4036220908164978, 'fcm_dpo/delta': -0.04709509760141373, 'fcm_dpo/margin': 69.02679443359375, 'margin_dpo/margin_mean': 69.02678680419922, 'margin_dpo/margin_std': 105.63395690917969, 'logps/chosen': -116.34903717041016, 'logps/rejected': -214.7163848876953, 'logps/ref_chosen': -56.315277099609375, 'logps/ref_rejected': -85.65583801269531, 'KL/chosen_KL_mean': -60.03376007080078, 'KL/rejected_KL_mean': -129.060546875, 'KL/mean': -94.54715728759766, 'KL/std': 85.97286224365234, 'logits/chosen': -0.4555599093437195, 'logits/rejected': -0.4504218101501465, 'epoch': 0.22} + 22%|██▏ | 153/681 [06:30<22:42, 2.58s/it] 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] {'loss': 1.1225, 'grad_norm': 18.80577278137207, 'learning_rate': 4.771163179548808e-07, 'fcm_dpo/beta': 0.006422577425837517, 'fcm_dpo/q_t': 0.40717241168022156, 'fcm_dpo/delta': -0.023874616250395775, 'fcm_dpo/margin': 65.80543518066406, 'margin_dpo/margin_mean': 65.8054428100586, 'margin_dpo/margin_std': 109.62528991699219, 'logps/chosen': -134.3277130126953, 'logps/rejected': -241.63478088378906, 'logps/ref_chosen': -62.74256896972656, 'logps/ref_rejected': -104.24420166015625, 'KL/chosen_KL_mean': -71.58514404296875, 'KL/rejected_KL_mean': -137.3905792236328, 'KL/mean': -104.48786926269531, 'KL/std': 85.70448303222656, 'logits/chosen': -0.46241965889930725, 'logits/rejected': -0.4656856656074524, 'epoch': 0.23} + 23%|██▎ | 154/681 [06:33<22:58, 2.62s/it] 23%|██▎ | 155/681 [06:35<22:56, 2.62s/it] {'loss': 1.0951, 'grad_norm': 19.153099060058594, 'learning_rate': 4.7657694675916247e-07, 'fcm_dpo/beta': 0.006380689330399036, 'fcm_dpo/q_t': 0.4044456481933594, 'fcm_dpo/delta': -0.02670937031507492, 'fcm_dpo/margin': 66.6856689453125, 'margin_dpo/margin_mean': 66.6856689453125, 'margin_dpo/margin_std': 98.61933135986328, 'logps/chosen': -125.8794937133789, 'logps/rejected': -209.4041748046875, 'logps/ref_chosen': -60.65318298339844, 'logps/ref_rejected': -77.49220275878906, 'KL/chosen_KL_mean': -65.22631072998047, 'KL/rejected_KL_mean': -131.91197204589844, 'KL/mean': -98.56913757324219, 'KL/std': 79.4256591796875, 'logits/chosen': -0.4766504764556885, 'logits/rejected': -0.4583345055580139, 'epoch': 0.23} + 23%|██▎ | 155/681 [06:35<22:56, 2.62s/it] 23%|██▎ | 156/681 [06:38<22:58, 2.62s/it] {'loss': 1.2762, 'grad_norm': 28.152284622192383, 'learning_rate': 4.7603160505017893e-07, 'fcm_dpo/beta': 0.006421338301151991, 'fcm_dpo/q_t': 0.4446510076522827, 'fcm_dpo/delta': 0.05203431844711304, 'fcm_dpo/margin': 38.129791259765625, 'margin_dpo/margin_mean': 38.129791259765625, 'margin_dpo/margin_std': 111.29301452636719, 'logps/chosen': -161.18519592285156, 'logps/rejected': -206.99240112304688, 'logps/ref_chosen': -69.49188232421875, 'logps/ref_rejected': -77.16929626464844, 'KL/chosen_KL_mean': -91.69331359863281, 'KL/rejected_KL_mean': -129.82310485839844, 'KL/mean': -110.75820922851562, 'KL/std': 84.64684295654297, 'logits/chosen': -0.40159350633621216, 'logits/rejected': -0.3953508138656616, 'epoch': 0.23} + 23%|██▎ | 156/681 [06:38<22:58, 2.62s/it] 23%|██▎ | 157/681 [06:40<22:20, 2.56s/it] {'loss': 1.0271, 'grad_norm': 23.096948623657227, 'learning_rate': 4.7548030719819154e-07, 'fcm_dpo/beta': 0.006253876723349094, 'fcm_dpo/q_t': 0.3786003589630127, 'fcm_dpo/delta': -0.14177267253398895, 'fcm_dpo/margin': 85.21761322021484, 'margin_dpo/margin_mean': 85.21761322021484, 'margin_dpo/margin_std': 105.14042663574219, 'logps/chosen': -143.90939331054688, 'logps/rejected': -275.4049377441406, 'logps/ref_chosen': -61.368438720703125, 'logps/ref_rejected': -107.64636993408203, 'KL/chosen_KL_mean': -82.54095458984375, 'KL/rejected_KL_mean': -167.75857543945312, 'KL/mean': -125.1497573852539, 'KL/std': 92.44023895263672, 'logits/chosen': -0.3750728964805603, 'logits/rejected': -0.3818325996398926, 'epoch': 0.23} + 23%|██▎ | 157/681 [06:40<22:20, 2.56s/it] 23%|██▎ | 158/681 [06:43<22:26, 2.57s/it] {'loss': 1.0566, 'grad_norm': 19.036361694335938, 'learning_rate': 4.7492306773041136e-07, 'fcm_dpo/beta': 0.0060958778485655785, 'fcm_dpo/q_t': 0.38691407442092896, 'fcm_dpo/delta': -0.1604328155517578, 'fcm_dpo/margin': 90.4881591796875, 'margin_dpo/margin_mean': 90.4881591796875, 'margin_dpo/margin_std': 138.78392028808594, 'logps/chosen': -141.2285614013672, 'logps/rejected': -287.7984619140625, 'logps/ref_chosen': -57.612918853759766, 'logps/ref_rejected': -113.6946792602539, 'KL/chosen_KL_mean': -83.61564636230469, 'KL/rejected_KL_mean': -174.10379028320312, 'KL/mean': -128.85971069335938, 'KL/std': 114.70477294921875, 'logits/chosen': -0.37780940532684326, 'logits/rejected': -0.39571529626846313, 'epoch': 0.23} + 23%|██▎ | 158/681 [06:43<22:26, 2.57s/it] 23%|██▎ | 159/681 [06:46<22:35, 2.60s/it] {'loss': 1.1492, 'grad_norm': 25.451343536376953, 'learning_rate': 4.743599013306165e-07, 'fcm_dpo/beta': 0.006093316245824099, 'fcm_dpo/q_t': 0.4160599112510681, 'fcm_dpo/delta': 0.02347235381603241, 'fcm_dpo/margin': 61.85674285888672, 'margin_dpo/margin_mean': 61.85674285888672, 'margin_dpo/margin_std': 109.38902282714844, 'logps/chosen': -175.5362548828125, 'logps/rejected': -244.73135375976562, 'logps/ref_chosen': -81.56034851074219, 'logps/ref_rejected': -88.89871215820312, 'KL/chosen_KL_mean': -93.97590637207031, 'KL/rejected_KL_mean': -155.83265686035156, 'KL/mean': -124.90428161621094, 'KL/std': 100.59730529785156, 'logits/chosen': -0.39171531796455383, 'logits/rejected': -0.36167389154434204, 'epoch': 0.23} + 23%|██▎ | 159/681 [06:46<22:35, 2.60s/it] 23%|██▎ | 160/681 [06:48<22:20, 2.57s/it] {'loss': 1.0904, 'grad_norm': 22.877182006835938, 'learning_rate': 4.737908228387656e-07, 'fcm_dpo/beta': 0.0059524280950427055, 'fcm_dpo/q_t': 0.39600396156311035, 'fcm_dpo/delta': -0.10199404507875443, 'fcm_dpo/margin': 83.32546997070312, 'margin_dpo/margin_mean': 83.32546997070312, 'margin_dpo/margin_std': 134.5235595703125, 'logps/chosen': -161.77194213867188, 'logps/rejected': -276.5843505859375, 'logps/ref_chosen': -65.73088073730469, 'logps/ref_rejected': -97.21781921386719, 'KL/chosen_KL_mean': -96.04105377197266, 'KL/rejected_KL_mean': -179.3665313720703, 'KL/mean': -137.7037811279297, 'KL/std': 107.62297058105469, 'logits/chosen': -0.35929036140441895, 'logits/rejected': -0.35081833600997925, 'epoch': 0.23} + 23%|██▎ | 160/681 [06:48<22:20, 2.57s/it] 24%|██▎ | 161/681 [06:50<21:26, 2.47s/it] {'loss': 1.0923, 'grad_norm': 21.35667610168457, 'learning_rate': 4.7321584725060594e-07, 'fcm_dpo/beta': 0.005920952185988426, 'fcm_dpo/q_t': 0.40439367294311523, 'fcm_dpo/delta': -0.033098410815000534, 'fcm_dpo/margin': 72.9051284790039, 'margin_dpo/margin_mean': 72.9051284790039, 'margin_dpo/margin_std': 107.10279846191406, 'logps/chosen': -134.23916625976562, 'logps/rejected': -238.13877868652344, 'logps/ref_chosen': -52.43647003173828, 'logps/ref_rejected': -83.43095397949219, 'KL/chosen_KL_mean': -81.80270385742188, 'KL/rejected_KL_mean': -154.70782470703125, 'KL/mean': -118.25526428222656, 'KL/std': 84.72265625, 'logits/chosen': -0.38000649213790894, 'logits/rejected': -0.38120192289352417, 'epoch': 0.24} + 24%|██▎ | 161/681 [06:50<21:26, 2.47s/it] 24%|██▍ | 162/681 [06:53<22:07, 2.56s/it] {'loss': 1.1092, 'grad_norm': 21.55710792541504, 'learning_rate': 4.7263498971727905e-07, 'fcm_dpo/beta': 0.0058417608961462975, 'fcm_dpo/q_t': 0.40738850831985474, 'fcm_dpo/delta': -0.022580057382583618, 'fcm_dpo/margin': 71.90220642089844, 'margin_dpo/margin_mean': 71.90221405029297, 'margin_dpo/margin_std': 110.78158569335938, 'logps/chosen': -141.6376190185547, 'logps/rejected': -240.31982421875, 'logps/ref_chosen': -62.6105842590332, 'logps/ref_rejected': -89.39057922363281, 'KL/chosen_KL_mean': -79.02703094482422, 'KL/rejected_KL_mean': -150.92922973632812, 'KL/mean': -114.97813415527344, 'KL/std': 93.30059814453125, 'logits/chosen': -0.4415048360824585, 'logits/rejected': -0.42762479186058044, 'epoch': 0.24} + 24%|██▍ | 162/681 [06:53<22:07, 2.56s/it] 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] {'loss': 1.1137, 'grad_norm': 19.775178909301758, 'learning_rate': 4.720482655449212e-07, 'fcm_dpo/beta': 0.00587341096252203, 'fcm_dpo/q_t': 0.4094018042087555, 'fcm_dpo/delta': -0.013822587206959724, 'fcm_dpo/margin': 70.35995483398438, 'margin_dpo/margin_mean': 70.35994720458984, 'margin_dpo/margin_std': 112.65255737304688, 'logps/chosen': -143.24710083007812, 'logps/rejected': -234.00363159179688, 'logps/ref_chosen': -55.021629333496094, 'logps/ref_rejected': -75.418212890625, 'KL/chosen_KL_mean': -88.2254638671875, 'KL/rejected_KL_mean': -158.58541870117188, 'KL/mean': -123.40544128417969, 'KL/std': 93.95071411132812, 'logits/chosen': -0.3558083772659302, 'logits/rejected': -0.33801817893981934, 'epoch': 0.24} + 24%|██▍ | 163/681 [06:56<22:00, 2.55s/it] 24%|██▍ | 164/681 [06:58<21:50, 2.54s/it] {'loss': 1.0381, 'grad_norm': 21.015703201293945, 'learning_rate': 4.714556901942599e-07, 'fcm_dpo/beta': 0.005744011141359806, 'fcm_dpo/q_t': 0.3890402913093567, 'fcm_dpo/delta': -0.09778200834989548, 'fcm_dpo/margin': 85.5523681640625, 'margin_dpo/margin_mean': 85.5523681640625, 'margin_dpo/margin_std': 105.12376403808594, 'logps/chosen': -136.0755157470703, 'logps/rejected': -245.65184020996094, 'logps/ref_chosen': -55.64066696166992, 'logps/ref_rejected': -79.66463470458984, 'KL/chosen_KL_mean': -80.43484497070312, 'KL/rejected_KL_mean': -165.98721313476562, 'KL/mean': -123.2110366821289, 'KL/std': 91.2215576171875, 'logits/chosen': -0.34407860040664673, 'logits/rejected': -0.3298642039299011, 'epoch': 0.24} + 24%|██▍ | 164/681 [06:58<21:50, 2.54s/it] 24%|██▍ | 165/681 [07:01<21:49, 2.54s/it] {'loss': 1.1741, 'grad_norm': 21.678955078125, 'learning_rate': 4.708572792802069e-07, 'fcm_dpo/beta': 0.0058072819374501705, 'fcm_dpo/q_t': 0.42748406529426575, 'fcm_dpo/delta': 0.07930518686771393, 'fcm_dpo/margin': 55.67503356933594, 'margin_dpo/margin_mean': 55.67503356933594, 'margin_dpo/margin_std': 103.26383972167969, 'logps/chosen': -148.45767211914062, 'logps/rejected': -216.49261474609375, 'logps/ref_chosen': -61.310691833496094, 'logps/ref_rejected': -73.67060852050781, 'KL/chosen_KL_mean': -87.14698028564453, 'KL/rejected_KL_mean': -142.822021484375, 'KL/mean': -114.9844970703125, 'KL/std': 77.58013916015625, 'logits/chosen': -0.3774159252643585, 'logits/rejected': -0.351327121257782, 'epoch': 0.24} + 24%|██▍ | 165/681 [07:01<21:49, 2.54s/it] 24%|██▍ | 166/681 [07:03<20:58, 2.44s/it] {'loss': 1.019, 'grad_norm': 17.48912239074707, 'learning_rate': 4.702530485714461e-07, 'fcm_dpo/beta': 0.00565761886537075, 'fcm_dpo/q_t': 0.3809058368206024, 'fcm_dpo/delta': -0.19167430698871613, 'fcm_dpo/margin': 102.40840148925781, 'margin_dpo/margin_mean': 102.40840148925781, 'margin_dpo/margin_std': 141.89622497558594, 'logps/chosen': -128.0039825439453, 'logps/rejected': -277.52392578125, 'logps/ref_chosen': -50.98360061645508, 'logps/ref_rejected': -98.09512329101562, 'KL/chosen_KL_mean': -77.0203857421875, 'KL/rejected_KL_mean': -179.4287872314453, 'KL/mean': -128.22457885742188, 'KL/std': 112.74888610839844, 'logits/chosen': -0.30309057235717773, 'logits/rejected': -0.31303203105926514, 'epoch': 0.24} + 24%|██▍ | 166/681 [07:03<20:58, 2.44s/it] 25%|██▍ | 167/681 [07:05<21:21, 2.49s/it] {'loss': 0.9757, 'grad_norm': 18.757923126220703, 'learning_rate': 4.6964301399001877e-07, 'fcm_dpo/beta': 0.005461276508867741, 'fcm_dpo/q_t': 0.368857204914093, 'fcm_dpo/delta': -0.19935590028762817, 'fcm_dpo/margin': 107.59046173095703, 'margin_dpo/margin_mean': 107.59046936035156, 'margin_dpo/margin_std': 118.7750015258789, 'logps/chosen': -128.72389221191406, 'logps/rejected': -281.92071533203125, 'logps/ref_chosen': -50.424095153808594, 'logps/ref_rejected': -96.03042602539062, 'KL/chosen_KL_mean': -78.29979705810547, 'KL/rejected_KL_mean': -185.89027404785156, 'KL/mean': -132.09503173828125, 'KL/std': 102.53227233886719, 'logits/chosen': -0.34970927238464355, 'logits/rejected': -0.35314348340034485, 'epoch': 0.25} + 25%|██▍ | 167/681 [07:05<21:21, 2.49s/it] 25%|██▍ | 168/681 [07:08<21:34, 2.52s/it] {'loss': 1.0804, 'grad_norm': 20.719741821289062, 'learning_rate': 4.690271916109034e-07, 'fcm_dpo/beta': 0.005374173633754253, 'fcm_dpo/q_t': 0.4044000506401062, 'fcm_dpo/delta': -0.02677498757839203, 'fcm_dpo/margin': 79.17437744140625, 'margin_dpo/margin_mean': 79.17437744140625, 'margin_dpo/margin_std': 107.18096923828125, 'logps/chosen': -133.9053497314453, 'logps/rejected': -238.92544555664062, 'logps/ref_chosen': -49.462825775146484, 'logps/ref_rejected': -75.30855560302734, 'KL/chosen_KL_mean': -84.44252014160156, 'KL/rejected_KL_mean': -163.61688232421875, 'KL/mean': -124.02970886230469, 'KL/std': 95.80686950683594, 'logits/chosen': -0.32083988189697266, 'logits/rejected': -0.3103061020374298, 'epoch': 0.25} + 25%|██▍ | 168/681 [07:08<21:34, 2.52s/it] 25%|██▍ | 169/681 [07:11<22:03, 2.58s/it] {'loss': 1.1631, 'grad_norm': 21.35027503967285, 'learning_rate': 4.6840559766159235e-07, 'fcm_dpo/beta': 0.005301401484757662, 'fcm_dpo/q_t': 0.4205383062362671, 'fcm_dpo/delta': -0.07176721096038818, 'fcm_dpo/margin': 69.26856231689453, 'margin_dpo/margin_mean': 69.26856994628906, 'margin_dpo/margin_std': 130.9052734375, 'logps/chosen': -146.69606018066406, 'logps/rejected': -239.50692749023438, 'logps/ref_chosen': -59.803443908691406, 'logps/ref_rejected': -83.34574890136719, 'KL/chosen_KL_mean': -86.89261627197266, 'KL/rejected_KL_mean': -156.1611785888672, 'KL/mean': -121.52689361572266, 'KL/std': 94.73361206054688, 'logits/chosen': -0.3565632700920105, 'logits/rejected': -0.34026655554771423, 'epoch': 0.25} + 25%|██▍ | 169/681 [07:11<22:03, 2.58s/it] 25%|██▍ | 170/681 [07:14<22:32, 2.65s/it] {'loss': 1.0812, 'grad_norm': 17.703929901123047, 'learning_rate': 4.6777824852166437e-07, 'fcm_dpo/beta': 0.0052484553307294846, 'fcm_dpo/q_t': 0.401960551738739, 'fcm_dpo/delta': -0.029036525636911392, 'fcm_dpo/margin': 81.35664367675781, 'margin_dpo/margin_mean': 81.35664367675781, 'margin_dpo/margin_std': 107.8404541015625, 'logps/chosen': -126.55712890625, 'logps/rejected': -234.3593292236328, 'logps/ref_chosen': -49.471771240234375, 'logps/ref_rejected': -75.91734313964844, 'KL/chosen_KL_mean': -77.08535766601562, 'KL/rejected_KL_mean': -158.44198608398438, 'KL/mean': -117.763671875, 'KL/std': 90.00228881835938, 'logits/chosen': -0.33288633823394775, 'logits/rejected': -0.3227166533470154, 'epoch': 0.25} + 25%|██▍ | 170/681 [07:14<22:32, 2.65s/it] 25%|██▌ | 171/681 [07:16<21:39, 2.55s/it] {'loss': 1.1718, 'grad_norm': 29.754013061523438, 'learning_rate': 4.6714516072235273e-07, 'fcm_dpo/beta': 0.005305338650941849, 'fcm_dpo/q_t': 0.42465952038764954, 'fcm_dpo/delta': 0.046089254319667816, 'fcm_dpo/margin': 67.01963806152344, 'margin_dpo/margin_mean': 67.01963806152344, 'margin_dpo/margin_std': 134.08868408203125, 'logps/chosen': -195.79473876953125, 'logps/rejected': -287.6971435546875, 'logps/ref_chosen': -84.49931335449219, 'logps/ref_rejected': -109.38209533691406, 'KL/chosen_KL_mean': -111.29542541503906, 'KL/rejected_KL_mean': -178.31504821777344, 'KL/mean': -144.80523681640625, 'KL/std': 104.81484985351562, 'logits/chosen': -0.35111328959465027, 'logits/rejected': -0.3345106542110443, 'epoch': 0.25} + 25%|██▌ | 171/681 [07:16<21:39, 2.55s/it] 25%|██▌ | 172/681 [07:18<21:23, 2.52s/it] {'loss': 1.1366, 'grad_norm': 22.75365447998047, 'learning_rate': 4.6650635094610966e-07, 'fcm_dpo/beta': 0.005340388976037502, 'fcm_dpo/q_t': 0.4173203706741333, 'fcm_dpo/delta': 0.033069491386413574, 'fcm_dpo/margin': 68.93881225585938, 'margin_dpo/margin_mean': 68.9388198852539, 'margin_dpo/margin_std': 114.0185775756836, 'logps/chosen': -167.39031982421875, 'logps/rejected': -253.1118927001953, 'logps/ref_chosen': -68.65391540527344, 'logps/ref_rejected': -85.43667602539062, 'KL/chosen_KL_mean': -98.73640441894531, 'KL/rejected_KL_mean': -167.67523193359375, 'KL/mean': -133.205810546875, 'KL/std': 101.24346160888672, 'logits/chosen': -0.38472980260849, 'logits/rejected': -0.3684314489364624, 'epoch': 0.25} + 25%|██▌ | 172/681 [07:18<21:23, 2.52s/it] 25%|██▌ | 173/681 [07:21<21:14, 2.51s/it] {'loss': 1.1124, 'grad_norm': 21.469446182250977, 'learning_rate': 4.6586183602616687e-07, 'fcm_dpo/beta': 0.0053945546969771385, 'fcm_dpo/q_t': 0.41492652893066406, 'fcm_dpo/delta': 0.026172153651714325, 'fcm_dpo/margin': 69.41828918457031, 'margin_dpo/margin_mean': 69.41828918457031, 'margin_dpo/margin_std': 98.99739074707031, 'logps/chosen': -152.96517944335938, 'logps/rejected': -238.01651000976562, 'logps/ref_chosen': -63.050880432128906, 'logps/ref_rejected': -78.68392181396484, 'KL/chosen_KL_mean': -89.91429901123047, 'KL/rejected_KL_mean': -159.3325958251953, 'KL/mean': -124.62344360351562, 'KL/std': 94.92538452148438, 'logits/chosen': -0.40895795822143555, 'logits/rejected': -0.3820996880531311, 'epoch': 0.25} + 25%|██▌ | 173/681 [07:21<21:14, 2.51s/it] 26%|██▌ | 174/681 [07:23<21:16, 2.52s/it] {'loss': 1.0841, 'grad_norm': 21.169090270996094, 'learning_rate': 4.652116329460919e-07, 'fcm_dpo/beta': 0.005375551991164684, 'fcm_dpo/q_t': 0.40161585807800293, 'fcm_dpo/delta': -0.04802219197154045, 'fcm_dpo/margin': 82.8753662109375, 'margin_dpo/margin_mean': 82.8753662109375, 'margin_dpo/margin_std': 118.40190887451172, 'logps/chosen': -138.75430297851562, 'logps/rejected': -270.17791748046875, 'logps/ref_chosen': -53.36296844482422, 'logps/ref_rejected': -101.91120910644531, 'KL/chosen_KL_mean': -85.3913345336914, 'KL/rejected_KL_mean': -168.26670837402344, 'KL/mean': -126.82902526855469, 'KL/std': 99.21536254882812, 'logits/chosen': -0.326399028301239, 'logits/rejected': -0.3440948724746704, 'epoch': 0.26} + 26%|██▌ | 174/681 [07:23<21:16, 2.52s/it] 26%|██▌ | 175/681 [07:26<21:28, 2.55s/it] {'loss': 0.9595, 'grad_norm': 27.521982192993164, 'learning_rate': 4.645557588393406e-07, 'fcm_dpo/beta': 0.005171348340809345, 'fcm_dpo/q_t': 0.3668813109397888, 'fcm_dpo/delta': -0.195995032787323, 'fcm_dpo/margin': 112.97673797607422, 'margin_dpo/margin_mean': 112.97673797607422, 'margin_dpo/margin_std': 112.159912109375, 'logps/chosen': -124.20813751220703, 'logps/rejected': -281.2729187011719, 'logps/ref_chosen': -45.417762756347656, 'logps/ref_rejected': -89.50579833984375, 'KL/chosen_KL_mean': -78.79037475585938, 'KL/rejected_KL_mean': -191.76712036132812, 'KL/mean': -135.27874755859375, 'KL/std': 107.09109497070312, 'logits/chosen': -0.3237273693084717, 'logits/rejected': -0.31100332736968994, 'epoch': 0.26} + 26%|██▌ | 175/681 [07:26<21:28, 2.55s/it] 26%|██▌ | 176/681 [07:28<20:45, 2.47s/it] {'loss': 1.0478, 'grad_norm': 18.892227172851562, 'learning_rate': 4.638942309888058e-07, 'fcm_dpo/beta': 0.005067367106676102, 'fcm_dpo/q_t': 0.3938441872596741, 'fcm_dpo/delta': -0.08898322284221649, 'fcm_dpo/margin': 95.66221618652344, 'margin_dpo/margin_mean': 95.66221618652344, 'margin_dpo/margin_std': 125.021484375, 'logps/chosen': -136.11935424804688, 'logps/rejected': -276.8876953125, 'logps/ref_chosen': -50.452842712402344, 'logps/ref_rejected': -95.5589599609375, 'KL/chosen_KL_mean': -85.66651916503906, 'KL/rejected_KL_mean': -181.3287353515625, 'KL/mean': -133.4976348876953, 'KL/std': 107.46882629394531, 'logits/chosen': -0.3252803385257721, 'logits/rejected': -0.34228193759918213, 'epoch': 0.26} + 26%|██▌ | 176/681 [07:28<20:45, 2.47s/it] 26%|██▌ | 177/681 [07:31<21:04, 2.51s/it] {'loss': 1.049, 'grad_norm': 26.08759117126465, 'learning_rate': 4.6322706682636137e-07, 'fcm_dpo/beta': 0.004990983754396439, 'fcm_dpo/q_t': 0.3944365382194519, 'fcm_dpo/delta': -0.07962613552808762, 'fcm_dpo/margin': 95.35098266601562, 'margin_dpo/margin_mean': 95.35098266601562, 'margin_dpo/margin_std': 123.23958587646484, 'logps/chosen': -160.30599975585938, 'logps/rejected': -290.33428955078125, 'logps/ref_chosen': -61.216468811035156, 'logps/ref_rejected': -95.89378356933594, 'KL/chosen_KL_mean': -99.08952331542969, 'KL/rejected_KL_mean': -194.4405059814453, 'KL/mean': -146.7650146484375, 'KL/std': 116.030517578125, 'logits/chosen': -0.36417263746261597, 'logits/rejected': -0.3568111062049866, 'epoch': 0.26} + 26%|██▌ | 177/681 [07:31<21:04, 2.51s/it] 26%|██▌ | 178/681 [07:33<20:56, 2.50s/it] {'loss': 1.0039, 'grad_norm': 27.715505599975586, 'learning_rate': 4.6255428393240354e-07, 'fcm_dpo/beta': 0.00480748200789094, 'fcm_dpo/q_t': 0.37657660245895386, 'fcm_dpo/delta': -0.18060356378555298, 'fcm_dpo/margin': 118.44398498535156, 'margin_dpo/margin_mean': 118.4439926147461, 'margin_dpo/margin_std': 148.56935119628906, 'logps/chosen': -167.51449584960938, 'logps/rejected': -333.05902099609375, 'logps/ref_chosen': -58.26478958129883, 'logps/ref_rejected': -105.3653335571289, 'KL/chosen_KL_mean': -109.24971008300781, 'KL/rejected_KL_mean': -227.69369506835938, 'KL/mean': -168.47171020507812, 'KL/std': 135.19757080078125, 'logits/chosen': -0.1987697333097458, 'logits/rejected': -0.18940778076648712, 'epoch': 0.26} + 26%|██▌ | 178/681 [07:33<20:56, 2.50s/it] 26%|██▋ | 179/681 [07:36<21:27, 2.56s/it] {'loss': 1.1442, 'grad_norm': 28.284343719482422, 'learning_rate': 4.6187590003538724e-07, 'fcm_dpo/beta': 0.00476008839905262, 'fcm_dpo/q_t': 0.41322624683380127, 'fcm_dpo/delta': 0.004362210631370544, 'fcm_dpo/margin': 83.05406188964844, 'margin_dpo/margin_mean': 83.05406188964844, 'margin_dpo/margin_std': 147.2235107421875, 'logps/chosen': -176.23965454101562, 'logps/rejected': -288.7632141113281, 'logps/ref_chosen': -61.05832290649414, 'logps/ref_rejected': -90.52782440185547, 'KL/chosen_KL_mean': -115.18132019042969, 'KL/rejected_KL_mean': -198.23538208007812, 'KL/mean': -156.70834350585938, 'KL/std': 117.613525390625, 'logits/chosen': -0.30069026350975037, 'logits/rejected': -0.3113616406917572, 'epoch': 0.26} + 26%|██▋ | 179/681 [07:36<21:27, 2.56s/it] 26%|██▋ | 180/681 [07:38<21:05, 2.53s/it] {'loss': 1.0301, 'grad_norm': 20.0328369140625, 'learning_rate': 4.611919330113591e-07, 'fcm_dpo/beta': 0.004700476303696632, 'fcm_dpo/q_t': 0.3854052722454071, 'fcm_dpo/delta': -0.11138296127319336, 'fcm_dpo/margin': 107.56156158447266, 'margin_dpo/margin_mean': 107.56156158447266, 'margin_dpo/margin_std': 131.99119567871094, 'logps/chosen': -153.74847412109375, 'logps/rejected': -305.1791687011719, 'logps/ref_chosen': -54.34272003173828, 'logps/ref_rejected': -98.21183776855469, 'KL/chosen_KL_mean': -99.40576171875, 'KL/rejected_KL_mean': -206.9673309326172, 'KL/mean': -153.18653869628906, 'KL/std': 105.46197509765625, 'logits/chosen': -0.26214757561683655, 'logits/rejected': -0.25771957635879517, 'epoch': 0.26} + 26%|██▋ | 180/681 [07:39<21:05, 2.53s/it] 27%|██▋ | 181/681 [07:41<21:18, 2.56s/it] {'loss': 1.174, 'grad_norm': 23.883167266845703, 'learning_rate': 4.605024008834863e-07, 'fcm_dpo/beta': 0.004746724851429462, 'fcm_dpo/q_t': 0.428183913230896, 'fcm_dpo/delta': 0.08582982420921326, 'fcm_dpo/margin': 66.72486877441406, 'margin_dpo/margin_mean': 66.72486877441406, 'margin_dpo/margin_std': 122.89712524414062, 'logps/chosen': -141.6181640625, 'logps/rejected': -214.99871826171875, 'logps/ref_chosen': -55.000457763671875, 'logps/ref_rejected': -61.656166076660156, 'KL/chosen_KL_mean': -86.6176986694336, 'KL/rejected_KL_mean': -153.34255981445312, 'KL/mean': -119.9801254272461, 'KL/std': 96.91038513183594, 'logits/chosen': -0.3121437132358551, 'logits/rejected': -0.2885586619377136, 'epoch': 0.27} + 27%|██▋ | 181/681 [07:41<21:18, 2.56s/it] 27%|██▋ | 182/681 [07:44<21:31, 2.59s/it] {'loss': 1.0089, 'grad_norm': 18.672292709350586, 'learning_rate': 4.598073218215817e-07, 'fcm_dpo/beta': 0.00462943222373724, 'fcm_dpo/q_t': 0.37539470195770264, 'fcm_dpo/delta': -0.16154250502586365, 'fcm_dpo/margin': 119.17942810058594, 'margin_dpo/margin_mean': 119.17942810058594, 'margin_dpo/margin_std': 142.9564971923828, 'logps/chosen': -123.673095703125, 'logps/rejected': -291.2662353515625, 'logps/ref_chosen': -41.107852935791016, 'logps/ref_rejected': -89.5215835571289, 'KL/chosen_KL_mean': -82.56523895263672, 'KL/rejected_KL_mean': -201.74465942382812, 'KL/mean': -142.1549530029297, 'KL/std': 118.71504211425781, 'logits/chosen': -0.2760277986526489, 'logits/rejected': -0.28654640913009644, 'epoch': 0.27} + 27%|██▋ | 182/681 [07:44<21:31, 2.59s/it] 27%|██▋ | 183/681 [07:46<20:52, 2.51s/it] {'loss': 1.1828, 'grad_norm': 21.404373168945312, 'learning_rate': 4.5910671414162484e-07, 'fcm_dpo/beta': 0.004568018019199371, 'fcm_dpo/q_t': 0.43251746892929077, 'fcm_dpo/delta': -0.04479080066084862, 'fcm_dpo/margin': 63.46442413330078, 'margin_dpo/margin_mean': 63.46442413330078, 'margin_dpo/margin_std': 107.15849304199219, 'logps/chosen': -177.8889923095703, 'logps/rejected': -259.8045654296875, 'logps/ref_chosen': -57.52456283569336, 'logps/ref_rejected': -75.97572326660156, 'KL/chosen_KL_mean': -120.36442565917969, 'KL/rejected_KL_mean': -183.828857421875, 'KL/mean': -152.0966339111328, 'KL/std': 97.2325210571289, 'logits/chosen': -0.29337215423583984, 'logits/rejected': -0.28445976972579956, 'epoch': 0.27} + 27%|██▋ | 183/681 [07:46<20:52, 2.51s/it] 27%|██▋ | 184/681 [07:49<21:20, 2.58s/it] {'loss': 1.1735, 'grad_norm': 19.19173240661621, 'learning_rate': 4.5840059630527985e-07, 'fcm_dpo/beta': 0.0045661963522434235, 'fcm_dpo/q_t': 0.4311205744743347, 'fcm_dpo/delta': -0.003989125601947308, 'fcm_dpo/margin': 66.39598083496094, 'margin_dpo/margin_mean': 66.39598083496094, 'margin_dpo/margin_std': 115.35989379882812, 'logps/chosen': -158.5340576171875, 'logps/rejected': -243.0191650390625, 'logps/ref_chosen': -58.544952392578125, 'logps/ref_rejected': -76.63406372070312, 'KL/chosen_KL_mean': -99.98910522460938, 'KL/rejected_KL_mean': -166.38510131835938, 'KL/mean': -133.18710327148438, 'KL/std': 91.612060546875, 'logits/chosen': -0.33634817600250244, 'logits/rejected': -0.32674121856689453, 'epoch': 0.27} + 27%|██▋ | 184/681 [07:49<21:20, 2.58s/it] 27%|██▋ | 185/681 [07:51<20:57, 2.54s/it] {'loss': 1.2331, 'grad_norm': 20.08793067932129, 'learning_rate': 4.5768898691940836e-07, 'fcm_dpo/beta': 0.004670283757150173, 'fcm_dpo/q_t': 0.4466909170150757, 'fcm_dpo/delta': 0.15583746135234833, 'fcm_dpo/margin': 53.11936950683594, 'margin_dpo/margin_mean': 53.1193733215332, 'margin_dpo/margin_std': 126.44635772705078, 'logps/chosen': -170.29129028320312, 'logps/rejected': -235.1474151611328, 'logps/ref_chosen': -62.025848388671875, 'logps/ref_rejected': -73.7625961303711, 'KL/chosen_KL_mean': -108.26544189453125, 'KL/rejected_KL_mean': -161.38482666015625, 'KL/mean': -134.82513427734375, 'KL/std': 105.25448608398438, 'logits/chosen': -0.28927063941955566, 'logits/rejected': -0.26542210578918457, 'epoch': 0.27} + 27%|██▋ | 185/681 [07:51<20:57, 2.54s/it] 27%|██▋ | 186/681 [07:54<20:51, 2.53s/it] {'loss': 1.0462, 'grad_norm': 22.96204948425293, 'learning_rate': 4.5697190473557947e-07, 'fcm_dpo/beta': 0.004654415883123875, 'fcm_dpo/q_t': 0.39436179399490356, 'fcm_dpo/delta': -0.06891189515590668, 'fcm_dpo/margin': 100.02880096435547, 'margin_dpo/margin_mean': 100.02880096435547, 'margin_dpo/margin_std': 121.30867767333984, 'logps/chosen': -168.51678466796875, 'logps/rejected': -287.2645568847656, 'logps/ref_chosen': -69.35346984863281, 'logps/ref_rejected': -88.07244873046875, 'KL/chosen_KL_mean': -99.16331481933594, 'KL/rejected_KL_mean': -199.19210815429688, 'KL/mean': -149.17770385742188, 'KL/std': 105.57518768310547, 'logits/chosen': -0.36928582191467285, 'logits/rejected': -0.3490529954433441, 'epoch': 0.27} + 27%|██▋ | 186/681 [07:54<20:51, 2.53s/it] 27%|██▋ | 187/681 [07:56<20:14, 2.46s/it] {'loss': 1.096, 'grad_norm': 21.904727935791016, 'learning_rate': 4.5624936864957555e-07, 'fcm_dpo/beta': 0.004670889116823673, 'fcm_dpo/q_t': 0.4105120003223419, 'fcm_dpo/delta': 0.01085655763745308, 'fcm_dpo/margin': 83.32472229003906, 'margin_dpo/margin_mean': 83.32472229003906, 'margin_dpo/margin_std': 108.60208129882812, 'logps/chosen': -144.84596252441406, 'logps/rejected': -257.3833312988281, 'logps/ref_chosen': -52.7564582824707, 'logps/ref_rejected': -81.96910095214844, 'KL/chosen_KL_mean': -92.08950805664062, 'KL/rejected_KL_mean': -175.4142303466797, 'KL/mean': -133.75186157226562, 'KL/std': 100.45945739746094, 'logits/chosen': -0.3443525433540344, 'logits/rejected': -0.3386707901954651, 'epoch': 0.27} + 27%|██▋ | 187/681 [07:56<20:14, 2.46s/it] 28%|██▊ | 188/681 [07:59<20:27, 2.49s/it] {'loss': 1.0498, 'grad_norm': 25.85726547241211, 'learning_rate': 4.5552139770089454e-07, 'fcm_dpo/beta': 0.004614308476448059, 'fcm_dpo/q_t': 0.3958631753921509, 'fcm_dpo/delta': -0.06334332376718521, 'fcm_dpo/margin': 99.78976440429688, 'margin_dpo/margin_mean': 99.7897720336914, 'margin_dpo/margin_std': 121.95850372314453, 'logps/chosen': -135.4721221923828, 'logps/rejected': -275.3868408203125, 'logps/ref_chosen': -49.415489196777344, 'logps/ref_rejected': -89.54043579101562, 'KL/chosen_KL_mean': -86.056640625, 'KL/rejected_KL_mean': -185.84640502929688, 'KL/mean': -135.95150756835938, 'KL/std': 111.05039978027344, 'logits/chosen': -0.3261992633342743, 'logits/rejected': -0.33161741495132446, 'epoch': 0.28} + 28%|██▊ | 188/681 [07:59<20:27, 2.49s/it] 28%|██▊ | 189/681 [08:01<20:10, 2.46s/it] {'loss': 1.1224, 'grad_norm': 20.663795471191406, 'learning_rate': 4.5478801107224794e-07, 'fcm_dpo/beta': 0.004608414135873318, 'fcm_dpo/q_t': 0.4146321415901184, 'fcm_dpo/delta': 0.011658096686005592, 'fcm_dpo/margin': 84.35992431640625, 'margin_dpo/margin_mean': 84.35991668701172, 'margin_dpo/margin_std': 137.48220825195312, 'logps/chosen': -151.84950256347656, 'logps/rejected': -255.97781372070312, 'logps/ref_chosen': -52.39896011352539, 'logps/ref_rejected': -72.16735076904297, 'KL/chosen_KL_mean': -99.45054626464844, 'KL/rejected_KL_mean': -183.81045532226562, 'KL/mean': -141.6304931640625, 'KL/std': 111.99412536621094, 'logits/chosen': -0.3318672776222229, 'logits/rejected': -0.31224292516708374, 'epoch': 0.28} + 28%|██▊ | 189/681 [08:01<20:10, 2.46s/it] 28%|██▊ | 190/681 [08:03<19:36, 2.40s/it] {'loss': 1.085, 'grad_norm': 17.597808837890625, 'learning_rate': 4.5404922808905543e-07, 'fcm_dpo/beta': 0.004610296338796616, 'fcm_dpo/q_t': 0.4002448320388794, 'fcm_dpo/delta': -0.05222197249531746, 'fcm_dpo/margin': 97.3665771484375, 'margin_dpo/margin_mean': 97.3665771484375, 'margin_dpo/margin_std': 137.46681213378906, 'logps/chosen': -171.3834686279297, 'logps/rejected': -306.61749267578125, 'logps/ref_chosen': -64.68305969238281, 'logps/ref_rejected': -102.55052185058594, 'KL/chosen_KL_mean': -106.70040893554688, 'KL/rejected_KL_mean': -204.06698608398438, 'KL/mean': -155.38369750976562, 'KL/std': 119.2169418334961, 'logits/chosen': -0.367323637008667, 'logits/rejected': -0.35587793588638306, 'epoch': 0.28} + 28%|██▊ | 190/681 [08:03<19:36, 2.40s/it] 28%|██▊ | 191/681 [08:06<20:24, 2.50s/it] {'loss': 0.9539, 'grad_norm': 18.488035202026367, 'learning_rate': 4.5330506821893565e-07, 'fcm_dpo/beta': 0.0043829334899783134, 'fcm_dpo/q_t': 0.3627857565879822, 'fcm_dpo/delta': -0.23674961924552917, 'fcm_dpo/margin': 141.63238525390625, 'margin_dpo/margin_mean': 141.63238525390625, 'margin_dpo/margin_std': 152.71173095703125, 'logps/chosen': -167.79779052734375, 'logps/rejected': -350.9109802246094, 'logps/ref_chosen': -68.65887451171875, 'logps/ref_rejected': -110.1396713256836, 'KL/chosen_KL_mean': -99.138916015625, 'KL/rejected_KL_mean': -240.77130126953125, 'KL/mean': -169.95510864257812, 'KL/std': 138.72772216796875, 'logits/chosen': -0.3747413754463196, 'logits/rejected': -0.35563361644744873, 'epoch': 0.28} + 28%|██▊ | 191/681 [08:06<20:24, 2.50s/it] 28%|██▊ | 192/681 [08:09<20:19, 2.49s/it] {'loss': 1.1169, 'grad_norm': 22.34331703186035, 'learning_rate': 4.5255555107119336e-07, 'fcm_dpo/beta': 0.0043565696105360985, 'fcm_dpo/q_t': 0.4108489751815796, 'fcm_dpo/delta': -0.004114950075745583, 'fcm_dpo/margin': 92.69398498535156, 'margin_dpo/margin_mean': 92.69398498535156, 'margin_dpo/margin_std': 148.20654296875, 'logps/chosen': -197.7608642578125, 'logps/rejected': -324.04931640625, 'logps/ref_chosen': -69.72691345214844, 'logps/ref_rejected': -103.32135009765625, 'KL/chosen_KL_mean': -128.03395080566406, 'KL/rejected_KL_mean': -220.72796630859375, 'KL/mean': -174.38095092773438, 'KL/std': 119.869873046875, 'logits/chosen': -0.3097224831581116, 'logits/rejected': -0.3084886074066162, 'epoch': 0.28} + 28%|██▊ | 192/681 [08:09<20:19, 2.49s/it] 28%|██▊ | 193/681 [08:11<20:01, 2.46s/it] {'loss': 1.2515, 'grad_norm': 26.714811325073242, 'learning_rate': 4.5180069639630236e-07, 'fcm_dpo/beta': 0.004361086059361696, 'fcm_dpo/q_t': 0.44291800260543823, 'fcm_dpo/delta': 0.038300659507513046, 'fcm_dpo/margin': 55.174468994140625, 'margin_dpo/margin_mean': 55.17446517944336, 'margin_dpo/margin_std': 142.05120849609375, 'logps/chosen': -188.6202392578125, 'logps/rejected': -260.01177978515625, 'logps/ref_chosen': -60.19049835205078, 'logps/ref_rejected': -76.40755462646484, 'KL/chosen_KL_mean': -128.42974853515625, 'KL/rejected_KL_mean': -183.60421752929688, 'KL/mean': -156.01699829101562, 'KL/std': 111.40166473388672, 'logits/chosen': -0.31071868538856506, 'logits/rejected': -0.2987961769104004, 'epoch': 0.28} + 28%|██▊ | 193/681 [08:11<20:01, 2.46s/it] 28%|██▊ | 194/681 [08:13<19:41, 2.43s/it] {'loss': 1.0846, 'grad_norm': 18.569744110107422, 'learning_rate': 4.510405240853854e-07, 'fcm_dpo/beta': 0.004372420255094767, 'fcm_dpo/q_t': 0.40928915143013, 'fcm_dpo/delta': 0.013335110619664192, 'fcm_dpo/margin': 88.51360321044922, 'margin_dpo/margin_mean': 88.51359558105469, 'margin_dpo/margin_std': 102.0927963256836, 'logps/chosen': -118.35442352294922, 'logps/rejected': -229.71243286132812, 'logps/ref_chosen': -37.84037399291992, 'logps/ref_rejected': -60.684783935546875, 'KL/chosen_KL_mean': -80.51405334472656, 'KL/rejected_KL_mean': -169.02764892578125, 'KL/mean': -124.77084350585938, 'KL/std': 93.25508117675781, 'logits/chosen': -0.22196577489376068, 'logits/rejected': -0.20398879051208496, 'epoch': 0.28} + 28%|██▊ | 194/681 [08:13<19:41, 2.43s/it] 29%|██▊ | 195/681 [08:16<20:15, 2.50s/it] {'loss': 1.0719, 'grad_norm': 21.247806549072266, 'learning_rate': 4.5027505416968985e-07, 'fcm_dpo/beta': 0.004376476630568504, 'fcm_dpo/q_t': 0.4035083055496216, 'fcm_dpo/delta': -0.019938159734010696, 'fcm_dpo/margin': 95.7625503540039, 'margin_dpo/margin_mean': 95.76254272460938, 'margin_dpo/margin_std': 117.87249755859375, 'logps/chosen': -180.79556274414062, 'logps/rejected': -318.4375, 'logps/ref_chosen': -54.891571044921875, 'logps/ref_rejected': -96.77095794677734, 'KL/chosen_KL_mean': -125.90398406982422, 'KL/rejected_KL_mean': -221.66653442382812, 'KL/mean': -173.78524780273438, 'KL/std': 114.66876220703125, 'logits/chosen': -0.28381267189979553, 'logits/rejected': -0.3019316792488098, 'epoch': 0.29} + 29%|██▊ | 195/681 [08:16<20:15, 2.50s/it] 29%|██▉ | 196/681 [08:18<20:19, 2.51s/it] {'loss': 1.0614, 'grad_norm': 16.711626052856445, 'learning_rate': 4.495043068200599e-07, 'fcm_dpo/beta': 0.004304712638258934, 'fcm_dpo/q_t': 0.3957948684692383, 'fcm_dpo/delta': -0.06879311800003052, 'fcm_dpo/margin': 107.95289611816406, 'margin_dpo/margin_mean': 107.95289611816406, 'margin_dpo/margin_std': 139.44515991210938, 'logps/chosen': -151.8568572998047, 'logps/rejected': -282.6174621582031, 'logps/ref_chosen': -53.245243072509766, 'logps/ref_rejected': -76.05294799804688, 'KL/chosen_KL_mean': -98.61161041259766, 'KL/rejected_KL_mean': -206.56451416015625, 'KL/mean': -152.5880584716797, 'KL/std': 116.51144409179688, 'logits/chosen': -0.31423407793045044, 'logits/rejected': -0.29917240142822266, 'epoch': 0.29} + 29%|██▉ | 196/681 [08:18<20:19, 2.51s/it] 29%|██▉ | 197/681 [08:21<20:29, 2.54s/it] {'loss': 1.116, 'grad_norm': 18.018680572509766, 'learning_rate': 4.4872830234640493e-07, 'fcm_dpo/beta': 0.004351750016212463, 'fcm_dpo/q_t': 0.4173157811164856, 'fcm_dpo/delta': 0.03937269002199173, 'fcm_dpo/margin': 83.12100982666016, 'margin_dpo/margin_mean': 83.12100982666016, 'margin_dpo/margin_std': 115.0757827758789, 'logps/chosen': -161.23431396484375, 'logps/rejected': -261.1438903808594, 'logps/ref_chosen': -60.42033386230469, 'logps/ref_rejected': -77.20890808105469, 'KL/chosen_KL_mean': -100.81398010253906, 'KL/rejected_KL_mean': -183.9349822998047, 'KL/mean': -142.37448120117188, 'KL/std': 100.89237976074219, 'logits/chosen': -0.303945392370224, 'logits/rejected': -0.2977169454097748, 'epoch': 0.29} + 29%|██▉ | 197/681 [08:21<20:29, 2.54s/it] 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] {'loss': 1.0595, 'grad_norm': 19.930042266845703, 'learning_rate': 4.479470611971645e-07, 'fcm_dpo/beta': 0.0043054306879639626, 'fcm_dpo/q_t': 0.3973381817340851, 'fcm_dpo/delta': -0.06737668812274933, 'fcm_dpo/margin': 107.83549499511719, 'margin_dpo/margin_mean': 107.83550262451172, 'margin_dpo/margin_std': 144.60723876953125, 'logps/chosen': -167.53515625, 'logps/rejected': -317.5777282714844, 'logps/ref_chosen': -55.03618621826172, 'logps/ref_rejected': -97.24325561523438, 'KL/chosen_KL_mean': -112.49896240234375, 'KL/rejected_KL_mean': -220.33447265625, 'KL/mean': -166.41671752929688, 'KL/std': 126.63810729980469, 'logits/chosen': -0.3347511887550354, 'logits/rejected': -0.3346249759197235, 'epoch': 0.29} + 29%|██▉ | 198/681 [08:24<20:32, 2.55s/it] 29%|██▉ | 199/681 [08:26<20:51, 2.60s/it] {'loss': 1.0729, 'grad_norm': 21.286582946777344, 'learning_rate': 4.471606039587695e-07, 'fcm_dpo/beta': 0.004234119318425655, 'fcm_dpo/q_t': 0.39938676357269287, 'fcm_dpo/delta': -0.047552645206451416, 'fcm_dpo/margin': 104.99636840820312, 'margin_dpo/margin_mean': 104.99636840820312, 'margin_dpo/margin_std': 139.47503662109375, 'logps/chosen': -165.24990844726562, 'logps/rejected': -298.065673828125, 'logps/ref_chosen': -56.828826904296875, 'logps/ref_rejected': -84.64820861816406, 'KL/chosen_KL_mean': -108.42107391357422, 'KL/rejected_KL_mean': -213.41744995117188, 'KL/mean': -160.91925048828125, 'KL/std': 113.93467712402344, 'logits/chosen': -0.3568047285079956, 'logits/rejected': -0.34048551321029663, 'epoch': 0.29} + 29%|██▉ | 199/681 [08:26<20:51, 2.60s/it] 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] {'loss': 1.0865, 'grad_norm': 19.893062591552734, 'learning_rate': 4.4636895135509966e-07, 'fcm_dpo/beta': 0.0041997479274868965, 'fcm_dpo/q_t': 0.4007049798965454, 'fcm_dpo/delta': -0.05252185836434364, 'fcm_dpo/margin': 107.0880126953125, 'margin_dpo/margin_mean': 107.0880126953125, 'margin_dpo/margin_std': 158.81661987304688, 'logps/chosen': -159.93264770507812, 'logps/rejected': -294.5620422363281, 'logps/ref_chosen': -53.06706237792969, 'logps/ref_rejected': -80.60843658447266, 'KL/chosen_KL_mean': -106.86558532714844, 'KL/rejected_KL_mean': -213.95359802246094, 'KL/mean': -160.4095916748047, 'KL/std': 124.127197265625, 'logits/chosen': -0.26880979537963867, 'logits/rejected': -0.24893805384635925, 'epoch': 0.29} + 29%|██▉ | 200/681 [08:29<20:52, 2.60s/it] 30%|██▉ | 201/681 [08:32<20:47, 2.60s/it] {'loss': 1.0782, 'grad_norm': 19.711732864379883, 'learning_rate': 4.455721242469372e-07, 'fcm_dpo/beta': 0.004188035614788532, 'fcm_dpo/q_t': 0.4008665084838867, 'fcm_dpo/delta': -0.04069505259394646, 'fcm_dpo/margin': 104.79566955566406, 'margin_dpo/margin_mean': 104.79566955566406, 'margin_dpo/margin_std': 143.77313232421875, 'logps/chosen': -186.86520385742188, 'logps/rejected': -331.06683349609375, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'KL/chosen_KL_mean': -111.46297454833984, 'KL/rejected_KL_mean': -216.25863647460938, 'KL/mean': -163.86080932617188, 'KL/std': 128.08802795410156, 'logits/chosen': -0.3536655306816101, 'logits/rejected': -0.34921911358833313, 'epoch': 0.3} + 30%|██▉ | 201/681 [08:32<20:47, 2.60s/it] 30%|██▉ | 202/681 [08:34<20:49, 2.61s/it] {'loss': 1.1837, 'grad_norm': 19.91847038269043, 'learning_rate': 4.4477014363141755e-07, 'fcm_dpo/beta': 0.004229954443871975, 'fcm_dpo/q_t': 0.43059661984443665, 'fcm_dpo/delta': 0.09195201843976974, 'fcm_dpo/margin': 73.4825210571289, 'margin_dpo/margin_mean': 73.48252868652344, 'margin_dpo/margin_std': 142.18524169921875, 'logps/chosen': -165.71932983398438, 'logps/rejected': -276.0855712890625, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'KL/chosen_KL_mean': -115.6180191040039, 'KL/rejected_KL_mean': -189.10052490234375, 'KL/mean': -152.35928344726562, 'KL/std': 110.5206527709961, 'logits/chosen': -0.3016967177391052, 'logits/rejected': -0.3155549168586731, 'epoch': 0.3} + 30%|██▉ | 202/681 [08:34<20:49, 2.61s/it] 30%|██▉ | 203/681 [08:37<21:01, 2.64s/it] {'loss': 1.1, 'grad_norm': 20.661174774169922, 'learning_rate': 4.439630306414758e-07, 'fcm_dpo/beta': 0.0042491694912314415, 'fcm_dpo/q_t': 0.41100189089775085, 'fcm_dpo/delta': 0.011875176802277565, 'fcm_dpo/margin': 91.44692993164062, 'margin_dpo/margin_mean': 91.44693756103516, 'margin_dpo/margin_std': 124.70313262939453, 'logps/chosen': -174.59019470214844, 'logps/rejected': -291.3233947753906, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'KL/chosen_KL_mean': -113.98049926757812, 'KL/rejected_KL_mean': -205.42742919921875, 'KL/mean': -159.70396423339844, 'KL/std': 113.47210693359375, 'logits/chosen': -0.34466350078582764, 'logits/rejected': -0.33388030529022217, 'epoch': 0.3} + 30%|██▉ | 203/681 [08:37<21:01, 2.64s/it] 30%|██▉ | 204/681 [08:40<21:12, 2.67s/it] {'loss': 1.1461, 'grad_norm': 22.547698974609375, 'learning_rate': 4.431508065452897e-07, 'fcm_dpo/beta': 0.004280552733689547, 'fcm_dpo/q_t': 0.42007431387901306, 'fcm_dpo/delta': 0.03934932500123978, 'fcm_dpo/margin': 84.58815002441406, 'margin_dpo/margin_mean': 84.58815002441406, 'margin_dpo/margin_std': 144.9606170654297, 'logps/chosen': -208.5168914794922, 'logps/rejected': -300.635986328125, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'KL/chosen_KL_mean': -128.3519287109375, 'KL/rejected_KL_mean': -212.94009399414062, 'KL/mean': -170.64601135253906, 'KL/std': 125.15516662597656, 'logits/chosen': -0.42533358931541443, 'logits/rejected': -0.38667869567871094, 'epoch': 0.3} + 30%|██▉ | 204/681 [08:40<21:12, 2.67s/it] 30%|███ | 205/681 [08:42<21:03, 2.65s/it] {'loss': 1.0529, 'grad_norm': 20.967945098876953, 'learning_rate': 4.4233349274571974e-07, 'fcm_dpo/beta': 0.004203906282782555, 'fcm_dpo/q_t': 0.39093706011772156, 'fcm_dpo/delta': -0.08103010058403015, 'fcm_dpo/margin': 113.03794860839844, 'margin_dpo/margin_mean': 113.03794860839844, 'margin_dpo/margin_std': 139.7296142578125, 'logps/chosen': -183.41543579101562, 'logps/rejected': -322.1936950683594, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'KL/chosen_KL_mean': -124.03070068359375, 'KL/rejected_KL_mean': -237.0686492919922, 'KL/mean': -180.54966735839844, 'KL/std': 127.14155578613281, 'logits/chosen': -0.31887465715408325, 'logits/rejected': -0.2883029878139496, 'epoch': 0.3} + 30%|███ | 205/681 [08:42<21:03, 2.65s/it] 30%|███ | 206/681 [08:45<20:09, 2.55s/it] {'loss': 1.0156, 'grad_norm': 26.109760284423828, 'learning_rate': 4.415111107797445e-07, 'fcm_dpo/beta': 0.004139425233006477, 'fcm_dpo/q_t': 0.38356366753578186, 'fcm_dpo/delta': -0.10684061050415039, 'fcm_dpo/margin': 120.99469757080078, 'margin_dpo/margin_mean': 120.99468994140625, 'margin_dpo/margin_std': 130.6953125, 'logps/chosen': -162.138427734375, 'logps/rejected': -335.12213134765625, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'KL/chosen_KL_mean': -115.17393493652344, 'KL/rejected_KL_mean': -236.16864013671875, 'KL/mean': -175.67129516601562, 'KL/std': 120.58004760742188, 'logits/chosen': -0.27412861585617065, 'logits/rejected': -0.2763686776161194, 'epoch': 0.3} + 30%|███ | 206/681 [08:45<20:09, 2.55s/it] 30%|███ | 207/681 [08:47<20:14, 2.56s/it] {'loss': 0.9897, 'grad_norm': 25.737268447875977, 'learning_rate': 4.4068368231789365e-07, 'fcm_dpo/beta': 0.004033949691802263, 'fcm_dpo/q_t': 0.37533849477767944, 'fcm_dpo/delta': -0.17078402638435364, 'fcm_dpo/margin': 139.16213989257812, 'margin_dpo/margin_mean': 139.16213989257812, 'margin_dpo/margin_std': 157.79168701171875, 'logps/chosen': -161.2530517578125, 'logps/rejected': -328.8067626953125, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'KL/chosen_KL_mean': -105.1968002319336, 'KL/rejected_KL_mean': -244.35894775390625, 'KL/mean': -174.7778778076172, 'KL/std': 136.5259246826172, 'logits/chosen': -0.3557325005531311, 'logits/rejected': -0.3302071690559387, 'epoch': 0.3} + 30%|███ | 207/681 [08:47<20:14, 2.56s/it] 31%|███ | 208/681 [08:50<20:22, 2.58s/it] {'loss': 1.092, 'grad_norm': 24.69184112548828, 'learning_rate': 4.398512291636768e-07, 'fcm_dpo/beta': 0.003962271846830845, 'fcm_dpo/q_t': 0.40099895000457764, 'fcm_dpo/delta': -0.04002426564693451, 'fcm_dpo/margin': 110.57653045654297, 'margin_dpo/margin_mean': 110.57653045654297, 'margin_dpo/margin_std': 163.5906982421875, 'logps/chosen': -231.79660034179688, 'logps/rejected': -369.5924072265625, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'KL/chosen_KL_mean': -164.72898864746094, 'KL/rejected_KL_mean': -275.3055114746094, 'KL/mean': -220.01724243164062, 'KL/std': 134.671875, 'logits/chosen': -0.3562470078468323, 'logits/rejected': -0.337843656539917, 'epoch': 0.31} + 31%|███ | 208/681 [08:50<20:22, 2.58s/it] 31%|███ | 209/681 [08:52<19:45, 2.51s/it] {'loss': 1.1271, 'grad_norm': 29.169567108154297, 'learning_rate': 4.3901377325300857e-07, 'fcm_dpo/beta': 0.003970026038587093, 'fcm_dpo/q_t': 0.4124048352241516, 'fcm_dpo/delta': 0.012157567776739597, 'fcm_dpo/margin': 97.8099365234375, 'margin_dpo/margin_mean': 97.8099365234375, 'margin_dpo/margin_std': 156.21786499023438, 'logps/chosen': -194.86485290527344, 'logps/rejected': -317.4346008300781, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'KL/chosen_KL_mean': -138.68316650390625, 'KL/rejected_KL_mean': -236.49307250976562, 'KL/mean': -187.58811950683594, 'KL/std': 122.4765853881836, 'logits/chosen': -0.25604674220085144, 'logits/rejected': -0.24463605880737305, 'epoch': 0.31} + 31%|███ | 209/681 [08:52<19:45, 2.51s/it] 31%|███ | 210/681 [08:55<19:33, 2.49s/it] {'loss': 1.0728, 'grad_norm': 24.660263061523438, 'learning_rate': 4.381713366536311e-07, 'fcm_dpo/beta': 0.003958011977374554, 'fcm_dpo/q_t': 0.39954549074172974, 'fcm_dpo/delta': -0.0493808314204216, 'fcm_dpo/margin': 112.94706726074219, 'margin_dpo/margin_mean': 112.94705200195312, 'margin_dpo/margin_std': 152.02679443359375, 'logps/chosen': -173.29229736328125, 'logps/rejected': -316.54913330078125, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'KL/chosen_KL_mean': -126.92047119140625, 'KL/rejected_KL_mean': -239.86752319335938, 'KL/mean': -183.3939971923828, 'KL/std': 125.841552734375, 'logits/chosen': -0.2841013967990875, 'logits/rejected': -0.275867760181427, 'epoch': 0.31} + 31%|███ | 210/681 [08:55<19:33, 2.49s/it] 31%|███ | 211/681 [08:57<19:04, 2.43s/it] {'loss': 1.141, 'grad_norm': 32.09341812133789, 'learning_rate': 4.373239415645323e-07, 'fcm_dpo/beta': 0.003944946452975273, 'fcm_dpo/q_t': 0.4174480438232422, 'fcm_dpo/delta': 0.01675173081457615, 'fcm_dpo/margin': 97.3060302734375, 'margin_dpo/margin_mean': 97.3060302734375, 'margin_dpo/margin_std': 168.17588806152344, 'logps/chosen': -257.2720947265625, 'logps/rejected': -362.4667663574219, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'KL/chosen_KL_mean': -178.33975219726562, 'KL/rejected_KL_mean': -275.6457824707031, 'KL/mean': -226.99276733398438, 'KL/std': 142.97116088867188, 'logits/chosen': -0.2926616668701172, 'logits/rejected': -0.25190287828445435, 'epoch': 0.31} + 31%|███ | 211/681 [08:57<19:04, 2.43s/it] 31%|███ | 212/681 [08:59<19:15, 2.46s/it] {'loss': 1.0332, 'grad_norm': 25.37755584716797, 'learning_rate': 4.3647161031536086e-07, 'fcm_dpo/beta': 0.0038361717015504837, 'fcm_dpo/q_t': 0.3819977641105652, 'fcm_dpo/delta': -0.13747426867485046, 'fcm_dpo/margin': 137.662841796875, 'margin_dpo/margin_mean': 137.66285705566406, 'margin_dpo/margin_std': 171.55409240722656, 'logps/chosen': -205.83685302734375, 'logps/rejected': -388.36053466796875, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05785369873047, 'KL/chosen_KL_mean': -147.63983154296875, 'KL/rejected_KL_mean': -285.30267333984375, 'KL/mean': -216.47125244140625, 'KL/std': 154.91241455078125, 'logits/chosen': -0.3288855254650116, 'logits/rejected': -0.32180070877075195, 'epoch': 0.31} + 31%|███ | 212/681 [08:59<19:15, 2.46s/it] 31%|███▏ | 213/681 [09:02<19:32, 2.50s/it] {'loss': 1.036, 'grad_norm': 25.538270950317383, 'learning_rate': 4.3561436536583774e-07, 'fcm_dpo/beta': 0.0037672575563192368, 'fcm_dpo/q_t': 0.38752636313438416, 'fcm_dpo/delta': -0.10035522282123566, 'fcm_dpo/margin': 131.33128356933594, 'margin_dpo/margin_mean': 131.33128356933594, 'margin_dpo/margin_std': 160.22622680664062, 'logps/chosen': -205.17935180664062, 'logps/rejected': -362.9126281738281, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'KL/chosen_KL_mean': -137.6666259765625, 'KL/rejected_KL_mean': -268.9979248046875, 'KL/mean': -203.332275390625, 'KL/std': 133.84786987304688, 'logits/chosen': -0.3342677354812622, 'logits/rejected': -0.3116719126701355, 'epoch': 0.31} + 31%|███▏ | 213/681 [09:02<19:32, 2.50s/it] 31%|███▏ | 214/681 [09:04<18:53, 2.43s/it] {'loss': 1.0677, 'grad_norm': 20.97165870666504, 'learning_rate': 4.3475222930516473e-07, 'fcm_dpo/beta': 0.0037363125011324883, 'fcm_dpo/q_t': 0.39983218908309937, 'fcm_dpo/delta': -0.04649418964982033, 'fcm_dpo/margin': 118.94905090332031, 'margin_dpo/margin_mean': 118.94905090332031, 'margin_dpo/margin_std': 155.6826629638672, 'logps/chosen': -160.7905731201172, 'logps/rejected': -315.65216064453125, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'KL/chosen_KL_mean': -119.18568420410156, 'KL/rejected_KL_mean': -238.13473510742188, 'KL/mean': -178.6602020263672, 'KL/std': 132.51971435546875, 'logits/chosen': -0.23450475931167603, 'logits/rejected': -0.23990775644779205, 'epoch': 0.31} + 31%|███▏ | 214/681 [09:04<18:53, 2.43s/it] 32%|███▏ | 215/681 [09:07<19:20, 2.49s/it] {'loss': 1.0375, 'grad_norm': 25.126728057861328, 'learning_rate': 4.3388522485142885e-07, 'fcm_dpo/beta': 0.0036775285843759775, 'fcm_dpo/q_t': 0.39243778586387634, 'fcm_dpo/delta': -0.0715101957321167, 'fcm_dpo/margin': 127.212158203125, 'margin_dpo/margin_mean': 127.212158203125, 'margin_dpo/margin_std': 143.94126892089844, 'logps/chosen': -193.4869384765625, 'logps/rejected': -357.3844909667969, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'KL/chosen_KL_mean': -140.20767211914062, 'KL/rejected_KL_mean': -267.41986083984375, 'KL/mean': -203.81375122070312, 'KL/std': 139.04193115234375, 'logits/chosen': -0.28624850511550903, 'logits/rejected': -0.2777059078216553, 'epoch': 0.32} + 32%|███▏ | 215/681 [09:07<19:20, 2.49s/it] 32%|███▏ | 216/681 [09:10<20:04, 2.59s/it] {'loss': 1.0802, 'grad_norm': 26.616178512573242, 'learning_rate': 4.330133748510036e-07, 'fcm_dpo/beta': 0.003656826913356781, 'fcm_dpo/q_t': 0.39861971139907837, 'fcm_dpo/delta': -0.05615860968828201, 'fcm_dpo/margin': 124.02898406982422, 'margin_dpo/margin_mean': 124.02898406982422, 'margin_dpo/margin_std': 176.90469360351562, 'logps/chosen': -193.15084838867188, 'logps/rejected': -345.490966796875, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'KL/chosen_KL_mean': -144.26304626464844, 'KL/rejected_KL_mean': -268.2920227050781, 'KL/mean': -206.27752685546875, 'KL/std': 143.73822021484375, 'logits/chosen': -0.26884669065475464, 'logits/rejected': -0.2517741322517395, 'epoch': 0.32} + 32%|███▏ | 216/681 [09:10<20:04, 2.59s/it] 32%|███▏ | 217/681 [09:12<20:06, 2.60s/it] {'loss': 1.0109, 'grad_norm': 20.384174346923828, 'learning_rate': 4.3213670227794757e-07, 'fcm_dpo/beta': 0.0035566347651183605, 'fcm_dpo/q_t': 0.3826901316642761, 'fcm_dpo/delta': -0.12853044271469116, 'fcm_dpo/margin': 146.6175537109375, 'margin_dpo/margin_mean': 146.6175537109375, 'margin_dpo/margin_std': 168.1234130859375, 'logps/chosen': -199.64779663085938, 'logps/rejected': -396.4983825683594, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'KL/chosen_KL_mean': -149.80250549316406, 'KL/rejected_KL_mean': -296.4200439453125, 'KL/mean': -223.11126708984375, 'KL/std': 148.53799438476562, 'logits/chosen': -0.2615566849708557, 'logits/rejected': -0.25557541847229004, 'epoch': 0.32} + 32%|███▏ | 217/681 [09:12<20:06, 2.60s/it] 32%|███▏ | 218/681 [09:15<20:08, 2.61s/it] {'loss': 1.1138, 'grad_norm': 23.56877326965332, 'learning_rate': 4.3125523023339815e-07, 'fcm_dpo/beta': 0.003537412267178297, 'fcm_dpo/q_t': 0.4114026129245758, 'fcm_dpo/delta': 0.0042178574949502945, 'fcm_dpo/margin': 111.91874694824219, 'margin_dpo/margin_mean': 111.91874694824219, 'margin_dpo/margin_std': 170.3919677734375, 'logps/chosen': -213.80563354492188, 'logps/rejected': -354.9941101074219, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'KL/chosen_KL_mean': -155.2289581298828, 'KL/rejected_KL_mean': -267.147705078125, 'KL/mean': -211.18832397460938, 'KL/std': 144.78065490722656, 'logits/chosen': -0.2710033059120178, 'logits/rejected': -0.26507091522216797, 'epoch': 0.32} + 32%|███▏ | 218/681 [09:15<20:08, 2.61s/it] 32%|███▏ | 219/681 [09:18<20:08, 2.62s/it] {'loss': 1.1723, 'grad_norm': 27.783300399780273, 'learning_rate': 4.303689819449636e-07, 'fcm_dpo/beta': 0.003590244799852371, 'fcm_dpo/q_t': 0.4225817918777466, 'fcm_dpo/delta': 0.05411606281995773, 'fcm_dpo/margin': 96.66064453125, 'margin_dpo/margin_mean': 96.66064453125, 'margin_dpo/margin_std': 184.8462371826172, 'logps/chosen': -228.47341918945312, 'logps/rejected': -349.88067626953125, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'KL/chosen_KL_mean': -167.38955688476562, 'KL/rejected_KL_mean': -264.05023193359375, 'KL/mean': -215.71990966796875, 'KL/std': 150.576904296875, 'logits/chosen': -0.3175503611564636, 'logits/rejected': -0.31175172328948975, 'epoch': 0.32} + 32%|███▏ | 219/681 [09:18<20:08, 2.62s/it] 32%|███▏ | 220/681 [09:20<20:05, 2.61s/it] {'loss': 1.1734, 'grad_norm': 28.48792266845703, 'learning_rate': 4.2947798076611047e-07, 'fcm_dpo/beta': 0.0036365140695124865, 'fcm_dpo/q_t': 0.4320225119590759, 'fcm_dpo/delta': 0.11278827488422394, 'fcm_dpo/margin': 79.91914367675781, 'margin_dpo/margin_mean': 79.91913604736328, 'margin_dpo/margin_std': 133.44189453125, 'logps/chosen': -260.775390625, 'logps/rejected': -358.3487548828125, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'KL/chosen_KL_mean': -190.74407958984375, 'KL/rejected_KL_mean': -270.6632080078125, 'KL/mean': -230.7036590576172, 'KL/std': 130.32717895507812, 'logits/chosen': -0.3003755211830139, 'logits/rejected': -0.2773016095161438, 'epoch': 0.32} + 32%|███▏ | 220/681 [09:20<20:05, 2.61s/it] 32%|███▏ | 221/681 [09:23<19:46, 2.58s/it] {'loss': 0.9307, 'grad_norm': 28.492124557495117, 'learning_rate': 4.285822501755485e-07, 'fcm_dpo/beta': 0.003541819052770734, 'fcm_dpo/q_t': 0.3551170825958252, 'fcm_dpo/delta': -0.24972575902938843, 'fcm_dpo/margin': 179.11788940429688, 'margin_dpo/margin_mean': 179.11788940429688, 'margin_dpo/margin_std': 166.56512451171875, 'logps/chosen': -215.40435791015625, 'logps/rejected': -448.835205078125, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'KL/chosen_KL_mean': -163.2496337890625, 'KL/rejected_KL_mean': -342.3675231933594, 'KL/mean': -252.80859375, 'KL/std': 164.04974365234375, 'logits/chosen': -0.28169721364974976, 'logits/rejected': -0.28886687755584717, 'epoch': 0.32} + 32%|███▏ | 221/681 [09:23<19:46, 2.58s/it] 33%|███▎ | 222/681 [09:25<19:39, 2.57s/it] {'loss': 1.0546, 'grad_norm': 19.775772094726562, 'learning_rate': 4.276818137766118e-07, 'fcm_dpo/beta': 0.0034665679559111595, 'fcm_dpo/q_t': 0.39466869831085205, 'fcm_dpo/delta': -0.0677119642496109, 'fcm_dpo/margin': 133.98260498046875, 'margin_dpo/margin_mean': 133.98260498046875, 'margin_dpo/margin_std': 168.62554931640625, 'logps/chosen': -225.58395385742188, 'logps/rejected': -398.59661865234375, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'KL/chosen_KL_mean': -164.61285400390625, 'KL/rejected_KL_mean': -298.595458984375, 'KL/mean': -231.60415649414062, 'KL/std': 149.91842651367188, 'logits/chosen': -0.31213629245758057, 'logits/rejected': -0.3134229779243469, 'epoch': 0.33} + 33%|███▎ | 222/681 [09:25<19:39, 2.57s/it] 33%|███▎ | 223/681 [09:27<18:44, 2.46s/it] {'loss': 1.1262, 'grad_norm': 23.93748664855957, 'learning_rate': 4.2677669529663686e-07, 'fcm_dpo/beta': 0.0034393020905554295, 'fcm_dpo/q_t': 0.4120517075061798, 'fcm_dpo/delta': 0.009148719720542431, 'fcm_dpo/margin': 113.74103546142578, 'margin_dpo/margin_mean': 113.74102783203125, 'margin_dpo/margin_std': 185.43588256835938, 'logps/chosen': -224.71560668945312, 'logps/rejected': -368.64111328125, 'logps/ref_chosen': -52.64057540893555, 'logps/ref_rejected': -82.82502746582031, 'KL/chosen_KL_mean': -172.0750274658203, 'KL/rejected_KL_mean': -285.8160705566406, 'KL/mean': -228.94554138183594, 'KL/std': 145.8609161376953, 'logits/chosen': -0.25658541917800903, 'logits/rejected': -0.2517361640930176, 'epoch': 0.33} + 33%|███▎ | 223/681 [09:27<18:44, 2.46s/it] 33%|███▎ | 224/681 [09:30<17:55, 2.35s/it] {'loss': 1.0858, 'grad_norm': 25.208892822265625, 'learning_rate': 4.2586691858633747e-07, 'fcm_dpo/beta': 0.003397725522518158, 'fcm_dpo/q_t': 0.4026610255241394, 'fcm_dpo/delta': -0.04458841681480408, 'fcm_dpo/margin': 129.87673950195312, 'margin_dpo/margin_mean': 129.87673950195312, 'margin_dpo/margin_std': 186.29876708984375, 'logps/chosen': -195.78643798828125, 'logps/rejected': -354.18426513671875, 'logps/ref_chosen': -48.59541320800781, 'logps/ref_rejected': -77.11648559570312, 'KL/chosen_KL_mean': -147.19102478027344, 'KL/rejected_KL_mean': -277.0677490234375, 'KL/mean': -212.12939453125, 'KL/std': 160.6813507080078, 'logits/chosen': -0.29166120290756226, 'logits/rejected': -0.2709968686103821, 'epoch': 0.33} + 33%|███▎ | 224/681 [09:30<17:55, 2.35s/it] 33%|███▎ | 225/681 [09:32<17:42, 2.33s/it] {'loss': 1.0318, 'grad_norm': 20.780384063720703, 'learning_rate': 4.249525076191759e-07, 'fcm_dpo/beta': 0.0033569016959518194, 'fcm_dpo/q_t': 0.3864118158817291, 'fcm_dpo/delta': -0.10895158350467682, 'fcm_dpo/margin': 149.94940185546875, 'margin_dpo/margin_mean': 149.94940185546875, 'margin_dpo/margin_std': 185.24148559570312, 'logps/chosen': -222.9158935546875, 'logps/rejected': -414.7677307128906, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90291595458984, 'KL/chosen_KL_mean': -164.91543579101562, 'KL/rejected_KL_mean': -314.86480712890625, 'KL/mean': -239.89012145996094, 'KL/std': 153.07418823242188, 'logits/chosen': -0.32863831520080566, 'logits/rejected': -0.317804753780365, 'epoch': 0.33} + 33%|███▎ | 225/681 [09:32<17:42, 2.33s/it] 33%|███▎ | 226/681 [09:34<18:19, 2.42s/it] {'loss': 1.1056, 'grad_norm': 24.846914291381836, 'learning_rate': 4.2403348649073167e-07, 'fcm_dpo/beta': 0.0033233477734029293, 'fcm_dpo/q_t': 0.4114220142364502, 'fcm_dpo/delta': 0.00356471911072731, 'fcm_dpo/margin': 119.13485717773438, 'margin_dpo/margin_mean': 119.13485717773438, 'margin_dpo/margin_std': 170.89193725585938, 'logps/chosen': -198.0763397216797, 'logps/rejected': -337.0001525878906, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'KL/chosen_KL_mean': -139.17755126953125, 'KL/rejected_KL_mean': -258.3123779296875, 'KL/mean': -198.74496459960938, 'KL/std': 148.14669799804688, 'logits/chosen': -0.3634711503982544, 'logits/rejected': -0.3225502371788025, 'epoch': 0.33} + 33%|███▎ | 226/681 [09:34<18:19, 2.42s/it] 33%|███▎ | 227/681 [09:37<18:05, 2.39s/it] {'loss': 1.0331, 'grad_norm': 21.650360107421875, 'learning_rate': 4.2310987941806615e-07, 'fcm_dpo/beta': 0.0032854501623660326, 'fcm_dpo/q_t': 0.38895145058631897, 'fcm_dpo/delta': -0.09252498298883438, 'fcm_dpo/margin': 148.35679626464844, 'margin_dpo/margin_mean': 148.35679626464844, 'margin_dpo/margin_std': 176.366943359375, 'logps/chosen': -220.1329345703125, 'logps/rejected': -408.82989501953125, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'KL/chosen_KL_mean': -161.0607452392578, 'KL/rejected_KL_mean': -309.41754150390625, 'KL/mean': -235.2391357421875, 'KL/std': 167.58319091796875, 'logits/chosen': -0.3720400631427765, 'logits/rejected': -0.35931509733200073, 'epoch': 0.33} + 33%|███▎ | 227/681 [09:37<18:05, 2.39s/it] 33%|███▎ | 228/681 [09:40<19:00, 2.52s/it] {'loss': 1.1403, 'grad_norm': 23.304601669311523, 'learning_rate': 4.2218171073908463e-07, 'fcm_dpo/beta': 0.003310044761747122, 'fcm_dpo/q_t': 0.41904619336128235, 'fcm_dpo/delta': 0.04850192740559578, 'fcm_dpo/margin': 106.68174743652344, 'margin_dpo/margin_mean': 106.6817398071289, 'margin_dpo/margin_std': 171.9312286376953, 'logps/chosen': -235.6334686279297, 'logps/rejected': -367.47265625, 'logps/ref_chosen': -65.89128875732422, 'logps/ref_rejected': -91.04875183105469, 'KL/chosen_KL_mean': -169.7421875, 'KL/rejected_KL_mean': -276.4239196777344, 'KL/mean': -223.08303833007812, 'KL/std': 140.11859130859375, 'logits/chosen': -0.33180832862854004, 'logits/rejected': -0.31371521949768066, 'epoch': 0.33} + 33%|███▎ | 228/681 [09:40<19:00, 2.52s/it] 34%|███▎ | 229/681 [09:42<18:52, 2.50s/it] {'loss': 1.1159, 'grad_norm': 36.011322021484375, 'learning_rate': 4.212490049118951e-07, 'fcm_dpo/beta': 0.0033262791112065315, 'fcm_dpo/q_t': 0.41243118047714233, 'fcm_dpo/delta': 0.02005820721387863, 'fcm_dpo/margin': 114.44705200195312, 'margin_dpo/margin_mean': 114.4470443725586, 'margin_dpo/margin_std': 170.42984008789062, 'logps/chosen': -245.3029327392578, 'logps/rejected': -373.571044921875, 'logps/ref_chosen': -70.70637512207031, 'logps/ref_rejected': -84.52741241455078, 'KL/chosen_KL_mean': -174.5965576171875, 'KL/rejected_KL_mean': -289.04364013671875, 'KL/mean': -231.82008361816406, 'KL/std': 162.17124938964844, 'logits/chosen': -0.4325563311576843, 'logits/rejected': -0.4033244848251343, 'epoch': 0.34} + 34%|███▎ | 229/681 [09:42<18:52, 2.50s/it] 34%|███▍ | 230/681 [09:44<18:31, 2.46s/it] {'loss': 0.9711, 'grad_norm': 35.2843132019043, 'learning_rate': 4.203117865141635e-07, 'fcm_dpo/beta': 0.003240791615098715, 'fcm_dpo/q_t': 0.3700660765171051, 'fcm_dpo/delta': -0.17158903181552887, 'fcm_dpo/margin': 173.3483123779297, 'margin_dpo/margin_mean': 173.34832763671875, 'margin_dpo/margin_std': 170.71090698242188, 'logps/chosen': -175.70333862304688, 'logps/rejected': -395.3915710449219, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'KL/chosen_KL_mean': -136.42132568359375, 'KL/rejected_KL_mean': -309.7696533203125, 'KL/mean': -223.09548950195312, 'KL/std': 155.21017456054688, 'logits/chosen': -0.3165392279624939, 'logits/rejected': -0.3209174871444702, 'epoch': 0.34} + 34%|███▍ | 230/681 [09:44<18:31, 2.46s/it] 34%|███▍ | 231/681 [09:47<18:47, 2.51s/it] {'loss': 1.1, 'grad_norm': 24.092422485351562, 'learning_rate': 4.1937008024246625e-07, 'fcm_dpo/beta': 0.00322412746027112, 'fcm_dpo/q_t': 0.4127604365348816, 'fcm_dpo/delta': 0.014150663278996944, 'fcm_dpo/margin': 119.83944702148438, 'margin_dpo/margin_mean': 119.83944702148438, 'margin_dpo/margin_std': 162.927978515625, 'logps/chosen': -222.97549438476562, 'logps/rejected': -353.66241455078125, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'KL/chosen_KL_mean': -159.6990509033203, 'KL/rejected_KL_mean': -279.5384826660156, 'KL/mean': -219.6187744140625, 'KL/std': 139.4580078125, 'logits/chosen': -0.3582533597946167, 'logits/rejected': -0.3271549940109253, 'epoch': 0.34} + 34%|███▍ | 231/681 [09:47<18:47, 2.51s/it] 34%|███▍ | 232/681 [09:50<19:12, 2.57s/it] {'loss': 1.164, 'grad_norm': 22.96601676940918, 'learning_rate': 4.1842391091163933e-07, 'fcm_dpo/beta': 0.003260795958340168, 'fcm_dpo/q_t': 0.43005359172821045, 'fcm_dpo/delta': 0.0887773260474205, 'fcm_dpo/margin': 96.32752227783203, 'margin_dpo/margin_mean': 96.3275146484375, 'margin_dpo/margin_std': 165.77828979492188, 'logps/chosen': -266.29901123046875, 'logps/rejected': -375.8548278808594, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'KL/chosen_KL_mean': -195.55023193359375, 'KL/rejected_KL_mean': -291.87774658203125, 'KL/mean': -243.71401977539062, 'KL/std': 161.44937133789062, 'logits/chosen': -0.38562819361686707, 'logits/rejected': -0.3633359670639038, 'epoch': 0.34} + 34%|███▍ | 232/681 [09:50<19:12, 2.57s/it] 34%|███▍ | 233/681 [09:52<19:30, 2.61s/it] {'loss': 1.064, 'grad_norm': 22.466392517089844, 'learning_rate': 4.174733034541245e-07, 'fcm_dpo/beta': 0.003234952688217163, 'fcm_dpo/q_t': 0.3919374644756317, 'fcm_dpo/delta': -0.10582563281059265, 'fcm_dpo/margin': 154.7547149658203, 'margin_dpo/margin_mean': 154.7547149658203, 'margin_dpo/margin_std': 224.2666015625, 'logps/chosen': -229.61448669433594, 'logps/rejected': -436.96630859375, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.4800796508789, 'KL/chosen_KL_mean': -174.73155212402344, 'KL/rejected_KL_mean': -329.48626708984375, 'KL/mean': -252.10891723632812, 'KL/std': 172.6971435546875, 'logits/chosen': -0.3720843195915222, 'logits/rejected': -0.373913049697876, 'epoch': 0.34} + 34%|███▍ | 233/681 [09:53<19:30, 2.61s/it] 34%|███▍ | 234/681 [09:55<19:33, 2.62s/it] {'loss': 1.0167, 'grad_norm': 30.547178268432617, 'learning_rate': 4.165182829193126e-07, 'fcm_dpo/beta': 0.0031358040869235992, 'fcm_dpo/q_t': 0.3850485682487488, 'fcm_dpo/delta': -0.11081574857234955, 'fcm_dpo/margin': 160.4628448486328, 'margin_dpo/margin_mean': 160.4628448486328, 'margin_dpo/margin_std': 173.27178955078125, 'logps/chosen': -213.57843017578125, 'logps/rejected': -429.953369140625, 'logps/ref_chosen': -44.094520568847656, 'logps/ref_rejected': -100.00663757324219, 'KL/chosen_KL_mean': -169.48391723632812, 'KL/rejected_KL_mean': -329.9467468261719, 'KL/mean': -249.71534729003906, 'KL/std': 153.66720581054688, 'logits/chosen': -0.3440871834754944, 'logits/rejected': -0.37053510546684265, 'epoch': 0.34} + 34%|███▍ | 234/681 [09:55<19:33, 2.62s/it] 35%|███▍ | 235/681 [09:58<19:05, 2.57s/it] {'loss': 1.1606, 'grad_norm': 26.673070907592773, 'learning_rate': 4.1555887447288255e-07, 'fcm_dpo/beta': 0.0031859464943408966, 'fcm_dpo/q_t': 0.4246324300765991, 'fcm_dpo/delta': 0.072694793343544, 'fcm_dpo/margin': 103.37664031982422, 'margin_dpo/margin_mean': 103.37664031982422, 'margin_dpo/margin_std': 178.27023315429688, 'logps/chosen': -264.5675048828125, 'logps/rejected': -396.10125732421875, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39506530761719, 'KL/chosen_KL_mean': -202.32957458496094, 'KL/rejected_KL_mean': -305.7062072753906, 'KL/mean': -254.01788330078125, 'KL/std': 147.2139892578125, 'logits/chosen': -0.40312904119491577, 'logits/rejected': -0.38146403431892395, 'epoch': 0.35} + 35%|███▍ | 235/681 [09:58<19:05, 2.57s/it] 35%|███▍ | 236/681 [10:00<19:17, 2.60s/it] {'loss': 0.9815, 'grad_norm': 37.26255416870117, 'learning_rate': 4.1459510339613946e-07, 'fcm_dpo/beta': 0.003130989382043481, 'fcm_dpo/q_t': 0.37771958112716675, 'fcm_dpo/delta': -0.12949799001216888, 'fcm_dpo/margin': 166.98321533203125, 'margin_dpo/margin_mean': 166.98321533203125, 'margin_dpo/margin_std': 149.95436096191406, 'logps/chosen': -194.41201782226562, 'logps/rejected': -415.56549072265625, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'KL/chosen_KL_mean': -145.07064819335938, 'KL/rejected_KL_mean': -312.0538635253906, 'KL/mean': -228.562255859375, 'KL/std': 156.35775756835938, 'logits/chosen': -0.3943854570388794, 'logits/rejected': -0.3957618474960327, 'epoch': 0.35} + 35%|███▍ | 236/681 [10:00<19:17, 2.60s/it] 35%|███▍ | 237/681 [10:03<19:16, 2.60s/it] {'loss': 1.1151, 'grad_norm': 30.76072120666504, 'learning_rate': 4.136269950853473e-07, 'fcm_dpo/beta': 0.0031184733379632235, 'fcm_dpo/q_t': 0.41311001777648926, 'fcm_dpo/delta': 0.01728936657309532, 'fcm_dpo/margin': 122.92462921142578, 'margin_dpo/margin_mean': 122.92462921142578, 'margin_dpo/margin_std': 184.13796997070312, 'logps/chosen': -255.05178833007812, 'logps/rejected': -418.5886535644531, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'KL/chosen_KL_mean': -200.8836669921875, 'KL/rejected_KL_mean': -323.80828857421875, 'KL/mean': -262.3459777832031, 'KL/std': 158.80929565429688, 'logits/chosen': -0.36829280853271484, 'logits/rejected': -0.36081379652023315, 'epoch': 0.35} + 35%|███▍ | 237/681 [10:03<19:16, 2.60s/it] 35%|███▍ | 238/681 [10:06<19:28, 2.64s/it] {'loss': 1.1056, 'grad_norm': 22.210086822509766, 'learning_rate': 4.126545750510605e-07, 'fcm_dpo/beta': 0.0031098374165594578, 'fcm_dpo/q_t': 0.4137893319129944, 'fcm_dpo/delta': 0.01364682987332344, 'fcm_dpo/margin': 124.28575134277344, 'margin_dpo/margin_mean': 124.28575134277344, 'margin_dpo/margin_std': 175.60623168945312, 'logps/chosen': -229.14889526367188, 'logps/rejected': -388.8794860839844, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'KL/chosen_KL_mean': -175.17578125, 'KL/rejected_KL_mean': -299.4615478515625, 'KL/mean': -237.3186492919922, 'KL/std': 155.89273071289062, 'logits/chosen': -0.3522963523864746, 'logits/rejected': -0.3655349612236023, 'epoch': 0.35} + 35%|███▍ | 238/681 [10:06<19:28, 2.64s/it] 35%|███▌ | 239/681 [10:08<18:41, 2.54s/it] {'loss': 1.0671, 'grad_norm': 27.21063804626465, 'learning_rate': 4.116778689174514e-07, 'fcm_dpo/beta': 0.0030899234116077423, 'fcm_dpo/q_t': 0.3998359739780426, 'fcm_dpo/delta': -0.0351216085255146, 'fcm_dpo/margin': 139.981201171875, 'margin_dpo/margin_mean': 139.981201171875, 'margin_dpo/margin_std': 168.2056121826172, 'logps/chosen': -252.4191436767578, 'logps/rejected': -427.89544677734375, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'KL/chosen_KL_mean': -194.32131958007812, 'KL/rejected_KL_mean': -334.3025207519531, 'KL/mean': -264.3119201660156, 'KL/std': 149.57962036132812, 'logits/chosen': -0.34761273860931396, 'logits/rejected': -0.3327868580818176, 'epoch': 0.35} + 35%|███▌ | 239/681 [10:08<18:41, 2.54s/it] 35%|███▌ | 240/681 [10:11<19:12, 2.61s/it] {'loss': 1.1445, 'grad_norm': 35.285789489746094, 'learning_rate': 4.106969024216348e-07, 'fcm_dpo/beta': 0.0031123950611799955, 'fcm_dpo/q_t': 0.4176512360572815, 'fcm_dpo/delta': 0.03792831301689148, 'fcm_dpo/margin': 116.74779510498047, 'margin_dpo/margin_mean': 116.74779510498047, 'margin_dpo/margin_std': 196.01150512695312, 'logps/chosen': -265.8973388671875, 'logps/rejected': -396.149169921875, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'KL/chosen_KL_mean': -205.28285217285156, 'KL/rejected_KL_mean': -322.0306396484375, 'KL/mean': -263.65673828125, 'KL/std': 164.07862854003906, 'logits/chosen': -0.4174574017524719, 'logits/rejected': -0.39536041021347046, 'epoch': 0.35} + 35%|███▌ | 240/681 [10:11<19:12, 2.61s/it] 35%|███▌ | 241/681 [10:13<18:46, 2.56s/it] {'loss': 0.9946, 'grad_norm': 23.488187789916992, 'learning_rate': 4.097117014129903e-07, 'fcm_dpo/beta': 0.003058013506233692, 'fcm_dpo/q_t': 0.37589675188064575, 'fcm_dpo/delta': -0.1680062711238861, 'fcm_dpo/margin': 182.75537109375, 'margin_dpo/margin_mean': 182.75540161132812, 'margin_dpo/margin_std': 207.28306579589844, 'logps/chosen': -236.68667602539062, 'logps/rejected': -441.41192626953125, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'KL/chosen_KL_mean': -170.59561157226562, 'KL/rejected_KL_mean': -353.35101318359375, 'KL/mean': -261.97332763671875, 'KL/std': 178.49505615234375, 'logits/chosen': -0.4499303102493286, 'logits/rejected': -0.42419755458831787, 'epoch': 0.35} + 35%|███▌ | 241/681 [10:13<18:46, 2.56s/it] 36%|███▌ | 242/681 [10:16<18:28, 2.53s/it] {'loss': 1.0916, 'grad_norm': 31.73065185546875, 'learning_rate': 4.087222918524807e-07, 'fcm_dpo/beta': 0.003036870388314128, 'fcm_dpo/q_t': 0.40894001722335815, 'fcm_dpo/delta': -0.0025676079094409943, 'fcm_dpo/margin': 132.41098022460938, 'margin_dpo/margin_mean': 132.41098022460938, 'margin_dpo/margin_std': 177.2808380126953, 'logps/chosen': -262.9025573730469, 'logps/rejected': -410.8099365234375, 'logps/ref_chosen': -67.86392974853516, 'logps/ref_rejected': -83.36033630371094, 'KL/chosen_KL_mean': -195.0386199951172, 'KL/rejected_KL_mean': -327.4496154785156, 'KL/mean': -261.244140625, 'KL/std': 152.1888427734375, 'logits/chosen': -0.3427576422691345, 'logits/rejected': -0.31177082657814026, 'epoch': 0.36} + 36%|███▌ | 242/681 [10:16<18:28, 2.53s/it] 36%|███▌ | 243/681 [10:18<18:27, 2.53s/it] {'loss': 1.0322, 'grad_norm': 21.928974151611328, 'learning_rate': 4.07728699811968e-07, 'fcm_dpo/beta': 0.002979143988341093, 'fcm_dpo/q_t': 0.38965845108032227, 'fcm_dpo/delta': -0.0886797159910202, 'fcm_dpo/margin': 162.53128051757812, 'margin_dpo/margin_mean': 162.53128051757812, 'margin_dpo/margin_std': 191.0034637451172, 'logps/chosen': -259.5120849609375, 'logps/rejected': -435.29473876953125, 'logps/ref_chosen': -63.0842399597168, 'logps/ref_rejected': -76.33563232421875, 'KL/chosen_KL_mean': -196.42782592773438, 'KL/rejected_KL_mean': -358.9591064453125, 'KL/mean': -277.6934814453125, 'KL/std': 171.84295654296875, 'logits/chosen': -0.3825536072254181, 'logits/rejected': -0.349841833114624, 'epoch': 0.36} + 36%|███▌ | 243/681 [10:18<18:27, 2.53s/it] 36%|███▌ | 244/681 [10:21<18:22, 2.52s/it] {'loss': 1.0172, 'grad_norm': 32.71875762939453, 'learning_rate': 4.067309514735267e-07, 'fcm_dpo/beta': 0.002924954518675804, 'fcm_dpo/q_t': 0.38810837268829346, 'fcm_dpo/delta': -0.08855760842561722, 'fcm_dpo/margin': 165.46893310546875, 'margin_dpo/margin_mean': 165.4689483642578, 'margin_dpo/margin_std': 169.16017150878906, 'logps/chosen': -232.87100219726562, 'logps/rejected': -432.0912170410156, 'logps/ref_chosen': -61.140689849853516, 'logps/ref_rejected': -94.89193725585938, 'KL/chosen_KL_mean': -171.73031616210938, 'KL/rejected_KL_mean': -337.19927978515625, 'KL/mean': -254.4647979736328, 'KL/std': 164.24757385253906, 'logits/chosen': -0.43552297353744507, 'logits/rejected': -0.4287059009075165, 'epoch': 0.36} + 36%|███▌ | 244/681 [10:21<18:22, 2.52s/it] 36%|███▌ | 245/681 [10:23<18:46, 2.58s/it] {'loss': 1.1129, 'grad_norm': 32.37158966064453, 'learning_rate': 4.057290731287531e-07, 'fcm_dpo/beta': 0.0029053720645606518, 'fcm_dpo/q_t': 0.4132363200187683, 'fcm_dpo/delta': 0.023488402366638184, 'fcm_dpo/margin': 129.53224182128906, 'margin_dpo/margin_mean': 129.53224182128906, 'margin_dpo/margin_std': 176.98114013671875, 'logps/chosen': -264.03729248046875, 'logps/rejected': -413.94732666015625, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'KL/chosen_KL_mean': -196.77499389648438, 'KL/rejected_KL_mean': -326.3072509765625, 'KL/mean': -261.5411376953125, 'KL/std': 160.04470825195312, 'logits/chosen': -0.43005210161209106, 'logits/rejected': -0.40020519495010376, 'epoch': 0.36} + 36%|███▌ | 245/681 [10:23<18:46, 2.58s/it] 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] {'loss': 1.1073, 'grad_norm': 29.707550048828125, 'learning_rate': 4.047230911780736e-07, 'fcm_dpo/beta': 0.002928508911281824, 'fcm_dpo/q_t': 0.41279125213623047, 'fcm_dpo/delta': 0.012364866212010384, 'fcm_dpo/margin': 132.4814910888672, 'margin_dpo/margin_mean': 132.4814910888672, 'margin_dpo/margin_std': 192.24024963378906, 'logps/chosen': -264.241455078125, 'logps/rejected': -414.372314453125, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'KL/chosen_KL_mean': -197.54446411132812, 'KL/rejected_KL_mean': -330.02593994140625, 'KL/mean': -263.78521728515625, 'KL/std': 181.79562377929688, 'logits/chosen': -0.4249339699745178, 'logits/rejected': -0.3837849497795105, 'epoch': 0.36} + 36%|███▌ | 246/681 [10:26<18:42, 2.58s/it] 36%|███▋ | 247/681 [10:28<18:28, 2.56s/it] {'loss': 1.0, 'grad_norm': 32.81269836425781, 'learning_rate': 4.0371303213004814e-07, 'fcm_dpo/beta': 0.002874248195439577, 'fcm_dpo/q_t': 0.3779839277267456, 'fcm_dpo/delta': -0.1419781595468521, 'fcm_dpo/margin': 185.85574340820312, 'margin_dpo/margin_mean': 185.8557586669922, 'margin_dpo/margin_std': 202.15628051757812, 'logps/chosen': -287.24346923828125, 'logps/rejected': -522.787109375, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29326629638672, 'KL/chosen_KL_mean': -230.63812255859375, 'KL/rejected_KL_mean': -416.493896484375, 'KL/mean': -323.56597900390625, 'KL/std': 187.34368896484375, 'logits/chosen': -0.36910104751586914, 'logits/rejected': -0.3661007285118103, 'epoch': 0.36} + 36%|███▋ | 247/681 [10:28<18:28, 2.56s/it] 36%|███▋ | 248/681 [10:31<18:26, 2.56s/it] {'loss': 1.0232, 'grad_norm': 24.8028564453125, 'learning_rate': 4.0269892260067197e-07, 'fcm_dpo/beta': 0.0028204985428601503, 'fcm_dpo/q_t': 0.3918088674545288, 'fcm_dpo/delta': -0.05965063348412514, 'fcm_dpo/margin': 161.81573486328125, 'margin_dpo/margin_mean': 161.81573486328125, 'margin_dpo/margin_std': 147.67108154296875, 'logps/chosen': -241.03079223632812, 'logps/rejected': -450.66021728515625, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'KL/chosen_KL_mean': -196.98757934570312, 'KL/rejected_KL_mean': -358.8033447265625, 'KL/mean': -277.89544677734375, 'KL/std': 152.73196411132812, 'logits/chosen': -0.3791336119174957, 'logits/rejected': -0.3977039158344269, 'epoch': 0.36} + 36%|███▋ | 248/681 [10:31<18:26, 2.56s/it] 37%|███▋ | 249/681 [10:33<18:00, 2.50s/it] {'loss': 1.2297, 'grad_norm': 36.00886154174805, 'learning_rate': 4.0168078931267426e-07, 'fcm_dpo/beta': 0.0028861965984106064, 'fcm_dpo/q_t': 0.44312575459480286, 'fcm_dpo/delta': 0.15084651112556458, 'fcm_dpo/margin': 87.6685791015625, 'margin_dpo/margin_mean': 87.6685791015625, 'margin_dpo/margin_std': 201.31149291992188, 'logps/chosen': -317.7867431640625, 'logps/rejected': -423.4810485839844, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'KL/chosen_KL_mean': -255.34439086914062, 'KL/rejected_KL_mean': -343.01300048828125, 'KL/mean': -299.1787109375, 'KL/std': 171.1071014404297, 'logits/chosen': -0.41152477264404297, 'logits/rejected': -0.38563063740730286, 'epoch': 0.37} + 37%|███▋ | 249/681 [10:33<18:00, 2.50s/it] 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] {'loss': 1.0207, 'grad_norm': 38.20355987548828, 'learning_rate': 4.006586590948141e-07, 'fcm_dpo/beta': 0.0028843069449067116, 'fcm_dpo/q_t': 0.3882160782814026, 'fcm_dpo/delta': -0.08404796570539474, 'fcm_dpo/margin': 166.430419921875, 'margin_dpo/margin_mean': 166.430419921875, 'margin_dpo/margin_std': 170.97409057617188, 'logps/chosen': -284.0045471191406, 'logps/rejected': -458.67010498046875, 'logps/ref_chosen': -65.63668823242188, 'logps/ref_rejected': -73.87184143066406, 'KL/chosen_KL_mean': -218.36785888671875, 'KL/rejected_KL_mean': -384.79827880859375, 'KL/mean': -301.58306884765625, 'KL/std': 167.09228515625, 'logits/chosen': -0.3953922390937805, 'logits/rejected': -0.3377394676208496, 'epoch': 0.37} + 37%|███▋ | 250/681 [10:36<17:41, 2.46s/it] 37%|███▋ | 251/681 [10:38<17:33, 2.45s/it] {'loss': 1.1682, 'grad_norm': 36.455989837646484, 'learning_rate': 3.9963255888117325e-07, 'fcm_dpo/beta': 0.002891149837523699, 'fcm_dpo/q_t': 0.42818212509155273, 'fcm_dpo/delta': 0.08716142922639847, 'fcm_dpo/margin': 109.18754577636719, 'margin_dpo/margin_mean': 109.18755340576172, 'margin_dpo/margin_std': 187.57749938964844, 'logps/chosen': -289.11932373046875, 'logps/rejected': -418.7875671386719, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'KL/chosen_KL_mean': -231.93658447265625, 'KL/rejected_KL_mean': -341.1241455078125, 'KL/mean': -286.5303649902344, 'KL/std': 173.23834228515625, 'logits/chosen': -0.3735540509223938, 'logits/rejected': -0.34010833501815796, 'epoch': 0.37} + 37%|███▋ | 251/681 [10:38<17:33, 2.45s/it] 37%|███▋ | 252/681 [10:41<17:54, 2.50s/it] {'loss': 1.0427, 'grad_norm': 26.218870162963867, 'learning_rate': 3.9860251571044666e-07, 'fcm_dpo/beta': 0.002887298120185733, 'fcm_dpo/q_t': 0.39595597982406616, 'fcm_dpo/delta': -0.03713885694742203, 'fcm_dpo/margin': 150.75619506835938, 'margin_dpo/margin_mean': 150.75619506835938, 'margin_dpo/margin_std': 148.70468139648438, 'logps/chosen': -290.60992431640625, 'logps/rejected': -454.4384765625, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75799560546875, 'KL/chosen_KL_mean': -218.92428588867188, 'KL/rejected_KL_mean': -369.68048095703125, 'KL/mean': -294.3023986816406, 'KL/std': 150.38790893554688, 'logits/chosen': -0.45149749517440796, 'logits/rejected': -0.4118601083755493, 'epoch': 0.37} + 37%|███▋ | 252/681 [10:41<17:54, 2.50s/it] 37%|███▋ | 253/681 [10:43<18:25, 2.58s/it] {'loss': 1.0731, 'grad_norm': 25.34404182434082, 'learning_rate': 3.9756855672522986e-07, 'fcm_dpo/beta': 0.0028968360275030136, 'fcm_dpo/q_t': 0.40177327394485474, 'fcm_dpo/delta': -0.021846629679203033, 'fcm_dpo/margin': 145.12869262695312, 'margin_dpo/margin_mean': 145.12869262695312, 'margin_dpo/margin_std': 174.61839294433594, 'logps/chosen': -259.2889099121094, 'logps/rejected': -433.9862060546875, 'logps/ref_chosen': -69.1339340209961, 'logps/ref_rejected': -98.70252990722656, 'KL/chosen_KL_mean': -190.15496826171875, 'KL/rejected_KL_mean': -335.2836608886719, 'KL/mean': -262.7193298339844, 'KL/std': 159.39236450195312, 'logits/chosen': -0.4528924822807312, 'logits/rejected': -0.444851279258728, 'epoch': 0.37} + 37%|███▋ | 253/681 [10:44<18:25, 2.58s/it] 37%|███▋ | 254/681 [10:46<18:31, 2.60s/it] {'loss': 1.139, 'grad_norm': 24.079992294311523, 'learning_rate': 3.965307091713037e-07, 'fcm_dpo/beta': 0.0028911656700074673, 'fcm_dpo/q_t': 0.41999658942222595, 'fcm_dpo/delta': 0.04102412983775139, 'fcm_dpo/margin': 124.68098449707031, 'margin_dpo/margin_mean': 124.68099975585938, 'margin_dpo/margin_std': 206.97445678710938, 'logps/chosen': -237.2046356201172, 'logps/rejected': -398.03826904296875, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'KL/chosen_KL_mean': -183.0496368408203, 'KL/rejected_KL_mean': -307.73065185546875, 'KL/mean': -245.39013671875, 'KL/std': 165.1351318359375, 'logits/chosen': -0.40010130405426025, 'logits/rejected': -0.3824934661388397, 'epoch': 0.37} + 37%|███▋ | 254/681 [10:46<18:31, 2.60s/it] 37%|███▋ | 255/681 [10:49<18:02, 2.54s/it] {'loss': 1.1089, 'grad_norm': 21.63443946838379, 'learning_rate': 3.954890003969163e-07, 'fcm_dpo/beta': 0.0028981873765587807, 'fcm_dpo/q_t': 0.41084420680999756, 'fcm_dpo/delta': 0.015838047489523888, 'fcm_dpo/margin': 132.70704650878906, 'margin_dpo/margin_mean': 132.70704650878906, 'margin_dpo/margin_std': 186.76455688476562, 'logps/chosen': -246.64581298828125, 'logps/rejected': -412.4197998046875, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'KL/chosen_KL_mean': -189.504150390625, 'KL/rejected_KL_mean': -322.211181640625, 'KL/mean': -255.857666015625, 'KL/std': 146.82505798339844, 'logits/chosen': -0.3540547490119934, 'logits/rejected': -0.34169769287109375, 'epoch': 0.37} + 37%|███▋ | 255/681 [10:49<18:02, 2.54s/it] 38%|███▊ | 256/681 [10:51<18:00, 2.54s/it] {'loss': 1.0717, 'grad_norm': 26.03676414489746, 'learning_rate': 3.944434578520628e-07, 'fcm_dpo/beta': 0.002899360843002796, 'fcm_dpo/q_t': 0.4021248519420624, 'fcm_dpo/delta': -0.025190845131874084, 'fcm_dpo/margin': 146.28195190429688, 'margin_dpo/margin_mean': 146.28195190429688, 'margin_dpo/margin_std': 182.9776611328125, 'logps/chosen': -223.52330017089844, 'logps/rejected': -407.2046813964844, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'KL/chosen_KL_mean': -168.35980224609375, 'KL/rejected_KL_mean': -314.6417541503906, 'KL/mean': -241.50079345703125, 'KL/std': 160.8679656982422, 'logits/chosen': -0.33700472116470337, 'logits/rejected': -0.34537577629089355, 'epoch': 0.38} + 38%|███▊ | 256/681 [10:51<18:00, 2.54s/it] 38%|███▊ | 257/681 [10:54<18:11, 2.57s/it] {'loss': 1.0698, 'grad_norm': 21.53156089782715, 'learning_rate': 3.933941090877615e-07, 'fcm_dpo/beta': 0.0028601905796676874, 'fcm_dpo/q_t': 0.4005059599876404, 'fcm_dpo/delta': -0.03735721856355667, 'fcm_dpo/margin': 151.95468139648438, 'margin_dpo/margin_mean': 151.95468139648438, 'margin_dpo/margin_std': 187.14981079101562, 'logps/chosen': -214.644775390625, 'logps/rejected': -396.71368408203125, 'logps/ref_chosen': -49.42369842529297, 'logps/ref_rejected': -79.53791809082031, 'KL/chosen_KL_mean': -165.2210693359375, 'KL/rejected_KL_mean': -317.1757507324219, 'KL/mean': -241.1984100341797, 'KL/std': 167.52880859375, 'logits/chosen': -0.3417869806289673, 'logits/rejected': -0.3275744915008545, 'epoch': 0.38} + 38%|███▊ | 257/681 [10:54<18:11, 2.57s/it] 38%|███▊ | 258/681 [10:56<17:30, 2.48s/it] {'loss': 1.0901, 'grad_norm': 25.97199249267578, 'learning_rate': 3.923409817553284e-07, 'fcm_dpo/beta': 0.0028670839965343475, 'fcm_dpo/q_t': 0.40125665068626404, 'fcm_dpo/delta': -0.02737291157245636, 'fcm_dpo/margin': 148.650634765625, 'margin_dpo/margin_mean': 148.650634765625, 'margin_dpo/margin_std': 211.3903045654297, 'logps/chosen': -277.44989013671875, 'logps/rejected': -462.70648193359375, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.99010467529297, 'KL/chosen_KL_mean': -218.06576538085938, 'KL/rejected_KL_mean': -366.71636962890625, 'KL/mean': -292.39105224609375, 'KL/std': 176.79669189453125, 'logits/chosen': -0.32123446464538574, 'logits/rejected': -0.3179280161857605, 'epoch': 0.38} + 38%|███▊ | 258/681 [10:56<17:30, 2.48s/it] 38%|███▊ | 259/681 [10:59<17:32, 2.49s/it] {'loss': 1.1309, 'grad_norm': 22.1605281829834, 'learning_rate': 3.9128410360564793e-07, 'fcm_dpo/beta': 0.002867575269192457, 'fcm_dpo/q_t': 0.4185197353363037, 'fcm_dpo/delta': 0.04339686781167984, 'fcm_dpo/margin': 124.87550354003906, 'margin_dpo/margin_mean': 124.8755111694336, 'margin_dpo/margin_std': 190.36257934570312, 'logps/chosen': -260.5933837890625, 'logps/rejected': -421.83221435546875, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.191650390625, 'KL/chosen_KL_mean': -207.76504516601562, 'KL/rejected_KL_mean': -332.64056396484375, 'KL/mean': -270.2027893066406, 'KL/std': 169.08270263671875, 'logits/chosen': -0.3787084221839905, 'logits/rejected': -0.37561601400375366, 'epoch': 0.38} + 38%|███▊ | 259/681 [10:59<17:32, 2.49s/it] 38%|███▊ | 260/681 [11:01<17:51, 2.54s/it] {'loss': 1.0221, 'grad_norm': 27.828453063964844, 'learning_rate': 3.9022350248844246e-07, 'fcm_dpo/beta': 0.002854567486792803, 'fcm_dpo/q_t': 0.38960734009742737, 'fcm_dpo/delta': -0.09267206490039825, 'fcm_dpo/margin': 171.02438354492188, 'margin_dpo/margin_mean': 171.02438354492188, 'margin_dpo/margin_std': 191.23297119140625, 'logps/chosen': -254.3938446044922, 'logps/rejected': -473.09033203125, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08978271484375, 'KL/chosen_KL_mean': -206.97616577148438, 'KL/rejected_KL_mean': -378.00054931640625, 'KL/mean': -292.4883728027344, 'KL/std': 176.58251953125, 'logits/chosen': -0.3447574973106384, 'logits/rejected': -0.36221379041671753, 'epoch': 0.38} + 38%|███▊ | 260/681 [11:01<17:51, 2.54s/it] 38%|███▊ | 261/681 [11:03<17:07, 2.45s/it] {'loss': 1.0723, 'grad_norm': 20.731279373168945, 'learning_rate': 3.891592063515376e-07, 'fcm_dpo/beta': 0.002804287476465106, 'fcm_dpo/q_t': 0.3995262384414673, 'fcm_dpo/delta': -0.04664212465286255, 'fcm_dpo/margin': 158.46176147460938, 'margin_dpo/margin_mean': 158.46176147460938, 'margin_dpo/margin_std': 213.8656005859375, 'logps/chosen': -269.50506591796875, 'logps/rejected': -463.4504089355469, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'KL/chosen_KL_mean': -216.47369384765625, 'KL/rejected_KL_mean': -374.9354553222656, 'KL/mean': -295.7045593261719, 'KL/std': 191.41683959960938, 'logits/chosen': -0.3055582344532013, 'logits/rejected': -0.3050229549407959, 'epoch': 0.38} + 38%|███▊ | 261/681 [11:03<17:07, 2.45s/it] 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] {'loss': 1.1038, 'grad_norm': 23.95509147644043, 'learning_rate': 3.880912432401264e-07, 'fcm_dpo/beta': 0.00282662408426404, 'fcm_dpo/q_t': 0.4136051535606384, 'fcm_dpo/delta': 0.029231306165456772, 'fcm_dpo/margin': 131.42083740234375, 'margin_dpo/margin_mean': 131.42083740234375, 'margin_dpo/margin_std': 170.3394317626953, 'logps/chosen': -308.4658508300781, 'logps/rejected': -466.68505859375, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'KL/chosen_KL_mean': -248.84571838378906, 'KL/rejected_KL_mean': -380.2665100097656, 'KL/mean': -314.5561218261719, 'KL/std': 172.76287841796875, 'logits/chosen': -0.31161201000213623, 'logits/rejected': -0.2819845676422119, 'epoch': 0.38} + 38%|███▊ | 262/681 [11:06<16:55, 2.42s/it] 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] {'loss': 1.0183, 'grad_norm': 21.146869659423828, 'learning_rate': 3.870196412960302e-07, 'fcm_dpo/beta': 0.0027571117971092463, 'fcm_dpo/q_t': 0.38271111249923706, 'fcm_dpo/delta': -0.1240774393081665, 'fcm_dpo/margin': 187.4507598876953, 'margin_dpo/margin_mean': 187.45074462890625, 'margin_dpo/margin_std': 219.32289123535156, 'logps/chosen': -281.99560546875, 'logps/rejected': -506.8825988769531, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'KL/chosen_KL_mean': -222.57464599609375, 'KL/rejected_KL_mean': -410.025390625, 'KL/mean': -316.29998779296875, 'KL/std': 204.56842041015625, 'logits/chosen': -0.3639793395996094, 'logits/rejected': -0.3423152267932892, 'epoch': 0.39} + 39%|███▊ | 263/681 [11:08<16:54, 2.43s/it] 39%|███▉ | 264/681 [11:11<17:32, 2.52s/it] {'loss': 1.0891, 'grad_norm': 23.543067932128906, 'learning_rate': 3.8594442875695665e-07, 'fcm_dpo/beta': 0.002720474498346448, 'fcm_dpo/q_t': 0.4044179320335388, 'fcm_dpo/delta': -0.024707935750484467, 'fcm_dpo/margin': 155.25442504882812, 'margin_dpo/margin_mean': 155.25442504882812, 'margin_dpo/margin_std': 211.25302124023438, 'logps/chosen': -296.59368896484375, 'logps/rejected': -482.9822998046875, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85620880126953, 'KL/chosen_KL_mean': -233.87161254882812, 'KL/rejected_KL_mean': -389.1260681152344, 'KL/mean': -311.49884033203125, 'KL/std': 185.69674682617188, 'logits/chosen': -0.40215420722961426, 'logits/rejected': -0.3942739963531494, 'epoch': 0.39} + 39%|███▉ | 264/681 [11:11<17:32, 2.52s/it] 39%|███▉ | 265/681 [11:14<17:41, 2.55s/it] {'loss': 1.1199, 'grad_norm': 23.92888832092285, 'learning_rate': 3.848656339557562e-07, 'fcm_dpo/beta': 0.0027331099845469, 'fcm_dpo/q_t': 0.40931302309036255, 'fcm_dpo/delta': -0.008440444245934486, 'fcm_dpo/margin': 149.31336975097656, 'margin_dpo/margin_mean': 149.31336975097656, 'margin_dpo/margin_std': 244.71060180664062, 'logps/chosen': -315.82366943359375, 'logps/rejected': -491.1861877441406, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'KL/chosen_KL_mean': -253.85220336914062, 'KL/rejected_KL_mean': -403.16558837890625, 'KL/mean': -328.5089111328125, 'KL/std': 207.49630737304688, 'logits/chosen': -0.29150500893592834, 'logits/rejected': -0.2723013758659363, 'epoch': 0.39} + 39%|███▉ | 265/681 [11:14<17:41, 2.55s/it] 39%|███▉ | 266/681 [11:16<17:30, 2.53s/it] {'loss': 1.1531, 'grad_norm': 50.139766693115234, 'learning_rate': 3.8378328531967507e-07, 'fcm_dpo/beta': 0.002761277835816145, 'fcm_dpo/q_t': 0.42465198040008545, 'fcm_dpo/delta': 0.07062655687332153, 'fcm_dpo/margin': 120.13714599609375, 'margin_dpo/margin_mean': 120.13715362548828, 'margin_dpo/margin_std': 199.931640625, 'logps/chosen': -324.41265869140625, 'logps/rejected': -445.42138671875, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'KL/chosen_KL_mean': -257.31298828125, 'KL/rejected_KL_mean': -377.4501647949219, 'KL/mean': -317.381591796875, 'KL/std': 173.2138671875, 'logits/chosen': -0.34931957721710205, 'logits/rejected': -0.30927109718322754, 'epoch': 0.39} + 39%|███▉ | 266/681 [11:16<17:30, 2.53s/it] 39%|███▉ | 267/681 [11:19<17:31, 2.54s/it] {'loss': 1.1023, 'grad_norm': 34.82039260864258, 'learning_rate': 3.8269741136960646e-07, 'fcm_dpo/beta': 0.0027605746872723103, 'fcm_dpo/q_t': 0.40831679105758667, 'fcm_dpo/delta': -0.0036356858909130096, 'fcm_dpo/margin': 146.06735229492188, 'margin_dpo/margin_mean': 146.06735229492188, 'margin_dpo/margin_std': 212.41896057128906, 'logps/chosen': -299.319580078125, 'logps/rejected': -466.58465576171875, 'logps/ref_chosen': -68.97075653076172, 'logps/ref_rejected': -90.16844940185547, 'KL/chosen_KL_mean': -230.34881591796875, 'KL/rejected_KL_mean': -376.41619873046875, 'KL/mean': -303.38250732421875, 'KL/std': 187.39883422851562, 'logits/chosen': -0.3749678134918213, 'logits/rejected': -0.34407055377960205, 'epoch': 0.39} + 39%|███▉ | 267/681 [11:19<17:31, 2.54s/it] 39%|███▉ | 268/681 [11:21<17:28, 2.54s/it] {'loss': 1.0941, 'grad_norm': 28.66946029663086, 'learning_rate': 3.8160804071933894e-07, 'fcm_dpo/beta': 0.002754632383584976, 'fcm_dpo/q_t': 0.4059686064720154, 'fcm_dpo/delta': -0.016000591218471527, 'fcm_dpo/margin': 150.69021606445312, 'margin_dpo/margin_mean': 150.69021606445312, 'margin_dpo/margin_std': 216.28262329101562, 'logps/chosen': -294.68927001953125, 'logps/rejected': -491.1268310546875, 'logps/ref_chosen': -55.90031051635742, 'logps/ref_rejected': -101.64763641357422, 'KL/chosen_KL_mean': -238.78897094726562, 'KL/rejected_KL_mean': -389.47918701171875, 'KL/mean': -314.13409423828125, 'KL/std': 180.19076538085938, 'logits/chosen': -0.35488247871398926, 'logits/rejected': -0.3602542281150818, 'epoch': 0.39} + 39%|███▉ | 268/681 [11:21<17:28, 2.54s/it] 40%|███▉ | 269/681 [11:24<17:14, 2.51s/it] {'loss': 1.0618, 'grad_norm': 26.64388084411621, 'learning_rate': 3.8051520207480204e-07, 'fcm_dpo/beta': 0.0027336953207850456, 'fcm_dpo/q_t': 0.3932827413082123, 'fcm_dpo/delta': -0.07361201196908951, 'fcm_dpo/margin': 172.00210571289062, 'margin_dpo/margin_mean': 172.00210571289062, 'margin_dpo/margin_std': 230.4850616455078, 'logps/chosen': -321.8299560546875, 'logps/rejected': -531.1419067382812, 'logps/ref_chosen': -70.03955841064453, 'logps/ref_rejected': -107.34937286376953, 'KL/chosen_KL_mean': -251.7904052734375, 'KL/rejected_KL_mean': -423.79254150390625, 'KL/mean': -337.79144287109375, 'KL/std': 181.66836547851562, 'logits/chosen': -0.4130655527114868, 'logits/rejected': -0.39535683393478394, 'epoch': 0.4} + 40%|███▉ | 269/681 [11:24<17:14, 2.51s/it] 40%|███▉ | 270/681 [11:26<17:34, 2.56s/it] {'loss': 1.1378, 'grad_norm': 33.281341552734375, 'learning_rate': 3.794189242333106e-07, 'fcm_dpo/beta': 0.0027484484016895294, 'fcm_dpo/q_t': 0.41904473304748535, 'fcm_dpo/delta': 0.04783637449145317, 'fcm_dpo/margin': 128.63064575195312, 'margin_dpo/margin_mean': 128.6306610107422, 'margin_dpo/margin_std': 204.2493896484375, 'logps/chosen': -287.8440856933594, 'logps/rejected': -456.8699035644531, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'KL/chosen_KL_mean': -218.31060791015625, 'KL/rejected_KL_mean': -346.9412841796875, 'KL/mean': -282.62591552734375, 'KL/std': 164.40748596191406, 'logits/chosen': -0.43409767746925354, 'logits/rejected': -0.4289151430130005, 'epoch': 0.4} + 40%|███▉ | 270/681 [11:26<17:34, 2.56s/it] 40%|███▉ | 271/681 [11:29<17:12, 2.52s/it] {'loss': 1.0393, 'grad_norm': 23.12546730041504, 'learning_rate': 3.7831923608280514e-07, 'fcm_dpo/beta': 0.002710944041609764, 'fcm_dpo/q_t': 0.39391976594924927, 'fcm_dpo/delta': -0.06562402844429016, 'fcm_dpo/margin': 170.47811889648438, 'margin_dpo/margin_mean': 170.47811889648438, 'margin_dpo/margin_std': 191.98031616210938, 'logps/chosen': -265.9978332519531, 'logps/rejected': -472.2252197265625, 'logps/ref_chosen': -56.76456832885742, 'logps/ref_rejected': -92.51383972167969, 'KL/chosen_KL_mean': -209.23326110839844, 'KL/rejected_KL_mean': -379.71136474609375, 'KL/mean': -294.4723205566406, 'KL/std': 168.14682006835938, 'logits/chosen': -0.3764873743057251, 'logits/rejected': -0.36014989018440247, 'epoch': 0.4} + 40%|███▉ | 271/681 [11:29<17:12, 2.52s/it] 40%|███▉ | 272/681 [11:31<17:34, 2.58s/it] {'loss': 0.9774, 'grad_norm': 33.842586517333984, 'learning_rate': 3.772161666010912e-07, 'fcm_dpo/beta': 0.0026611106004565954, 'fcm_dpo/q_t': 0.3735220432281494, 'fcm_dpo/delta': -0.15195293724536896, 'fcm_dpo/margin': 204.4078369140625, 'margin_dpo/margin_mean': 204.4078369140625, 'margin_dpo/margin_std': 192.80450439453125, 'logps/chosen': -249.95352172851562, 'logps/rejected': -510.406982421875, 'logps/ref_chosen': -49.497154235839844, 'logps/ref_rejected': -105.54279327392578, 'KL/chosen_KL_mean': -200.45635986328125, 'KL/rejected_KL_mean': -404.86419677734375, 'KL/mean': -302.6602783203125, 'KL/std': 187.67584228515625, 'logits/chosen': -0.2986787259578705, 'logits/rejected': -0.31125104427337646, 'epoch': 0.4} + 40%|███▉ | 272/681 [11:31<17:34, 2.58s/it] 40%|████ | 273/681 [11:34<17:09, 2.52s/it] {'loss': 1.0174, 'grad_norm': 30.54670524597168, 'learning_rate': 3.761097448550755e-07, 'fcm_dpo/beta': 0.0025815139524638653, 'fcm_dpo/q_t': 0.38513267040252686, 'fcm_dpo/delta': -0.1033368706703186, 'fcm_dpo/margin': 192.7875213623047, 'margin_dpo/margin_mean': 192.7875213623047, 'margin_dpo/margin_std': 209.65719604492188, 'logps/chosen': -294.1769714355469, 'logps/rejected': -516.4876708984375, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'KL/chosen_KL_mean': -231.20156860351562, 'KL/rejected_KL_mean': -423.98907470703125, 'KL/mean': -327.5953369140625, 'KL/std': 184.35150146484375, 'logits/chosen': -0.2978020906448364, 'logits/rejected': -0.2806628346443176, 'epoch': 0.4} + 40%|████ | 273/681 [11:34<17:09, 2.52s/it] 40%|████ | 274/681 [11:36<16:48, 2.48s/it] {'loss': 1.1059, 'grad_norm': 26.89436149597168, 'learning_rate': 3.75e-07, 'fcm_dpo/beta': 0.002592704724520445, 'fcm_dpo/q_t': 0.41287532448768616, 'fcm_dpo/delta': 0.023143114522099495, 'fcm_dpo/margin': 145.5963134765625, 'margin_dpo/margin_mean': 145.5963134765625, 'margin_dpo/margin_std': 196.85382080078125, 'logps/chosen': -331.75958251953125, 'logps/rejected': -499.0212707519531, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'KL/chosen_KL_mean': -276.0918884277344, 'KL/rejected_KL_mean': -421.68817138671875, 'KL/mean': -348.8900146484375, 'KL/std': 173.11854553222656, 'logits/chosen': -0.2569617033004761, 'logits/rejected': -0.24136140942573547, 'epoch': 0.4} + 40%|████ | 274/681 [11:36<16:48, 2.48s/it] 40%|████ | 275/681 [11:39<17:09, 2.54s/it] {'loss': 1.0756, 'grad_norm': 26.170629501342773, 'learning_rate': 3.738869612786737e-07, 'fcm_dpo/beta': 0.0025886246003210545, 'fcm_dpo/q_t': 0.403909832239151, 'fcm_dpo/delta': -0.015149945393204689, 'fcm_dpo/margin': 160.09922790527344, 'margin_dpo/margin_mean': 160.09922790527344, 'margin_dpo/margin_std': 196.3067169189453, 'logps/chosen': -259.0332946777344, 'logps/rejected': -463.84149169921875, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'KL/chosen_KL_mean': -210.4385986328125, 'KL/rejected_KL_mean': -370.5378112792969, 'KL/mean': -290.48822021484375, 'KL/std': 176.77706909179688, 'logits/chosen': -0.31770947575569153, 'logits/rejected': -0.32402610778808594, 'epoch': 0.4} + 40%|████ | 275/681 [11:39<17:09, 2.54s/it] 41%|████ | 276/681 [11:41<17:01, 2.52s/it] {'loss': 1.0897, 'grad_norm': 33.1538200378418, 'learning_rate': 3.7277065802070204e-07, 'fcm_dpo/beta': 0.0025693178176879883, 'fcm_dpo/q_t': 0.4061383008956909, 'fcm_dpo/delta': -0.014239070937037468, 'fcm_dpo/margin': 160.95144653320312, 'margin_dpo/margin_mean': 160.95144653320312, 'margin_dpo/margin_std': 220.95907592773438, 'logps/chosen': -283.90032958984375, 'logps/rejected': -458.64007568359375, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'KL/chosen_KL_mean': -227.3229217529297, 'KL/rejected_KL_mean': -388.2743835449219, 'KL/mean': -307.79864501953125, 'KL/std': 183.3173065185547, 'logits/chosen': -0.2713956832885742, 'logits/rejected': -0.2456541657447815, 'epoch': 0.41} + 41%|████ | 276/681 [11:41<17:01, 2.52s/it] 41%|████ | 277/681 [11:44<16:39, 2.47s/it] {'loss': 1.082, 'grad_norm': 23.64580726623535, 'learning_rate': 3.71651119641714e-07, 'fcm_dpo/beta': 0.002568996511399746, 'fcm_dpo/q_t': 0.4032415747642517, 'fcm_dpo/delta': -0.023184221237897873, 'fcm_dpo/margin': 164.33425903320312, 'margin_dpo/margin_mean': 164.33425903320312, 'margin_dpo/margin_std': 218.15975952148438, 'logps/chosen': -307.5662841796875, 'logps/rejected': -508.51025390625, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'KL/chosen_KL_mean': -251.29473876953125, 'KL/rejected_KL_mean': -415.6289978027344, 'KL/mean': -333.46185302734375, 'KL/std': 182.88909912109375, 'logits/chosen': -0.2841571569442749, 'logits/rejected': -0.26456978917121887, 'epoch': 0.41} + 41%|████ | 277/681 [11:44<16:39, 2.47s/it] 41%|████ | 278/681 [11:46<17:05, 2.55s/it] {'loss': 1.026, 'grad_norm': 24.158504486083984, 'learning_rate': 3.705283756425872e-07, 'fcm_dpo/beta': 0.0025190459564328194, 'fcm_dpo/q_t': 0.38852113485336304, 'fcm_dpo/delta': -0.09271565079689026, 'fcm_dpo/margin': 193.64944458007812, 'margin_dpo/margin_mean': 193.6494598388672, 'margin_dpo/margin_std': 218.25051879882812, 'logps/chosen': -271.9851379394531, 'logps/rejected': -503.94622802734375, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'KL/chosen_KL_mean': -219.04319763183594, 'KL/rejected_KL_mean': -412.692626953125, 'KL/mean': -315.867919921875, 'KL/std': 195.05999755859375, 'logits/chosen': -0.27769631147384644, 'logits/rejected': -0.2823639214038849, 'epoch': 0.41} + 41%|████ | 278/681 [11:46<17:05, 2.55s/it] 41%|████ | 279/681 [11:49<16:58, 2.53s/it] {'loss': 1.0546, 'grad_norm': 27.312673568725586, 'learning_rate': 3.6940245560867e-07, 'fcm_dpo/beta': 0.0024712784215807915, 'fcm_dpo/q_t': 0.39302849769592285, 'fcm_dpo/delta': -0.07853814959526062, 'fcm_dpo/margin': 191.73904418945312, 'margin_dpo/margin_mean': 191.73904418945312, 'margin_dpo/margin_std': 247.916748046875, 'logps/chosen': -299.29962158203125, 'logps/rejected': -530.248779296875, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'KL/chosen_KL_mean': -250.65829467773438, 'KL/rejected_KL_mean': -442.3973388671875, 'KL/mean': -346.52783203125, 'KL/std': 203.59503173828125, 'logits/chosen': -0.24171388149261475, 'logits/rejected': -0.2399359941482544, 'epoch': 0.41} + 41%|████ | 279/681 [11:49<16:58, 2.53s/it] 41%|████ | 280/681 [11:52<17:09, 2.57s/it] {'loss': 1.0254, 'grad_norm': 25.471012115478516, 'learning_rate': 3.6827338920900253e-07, 'fcm_dpo/beta': 0.0024436868261545897, 'fcm_dpo/q_t': 0.3888060450553894, 'fcm_dpo/delta': -0.0788697600364685, 'fcm_dpo/margin': 194.39535522460938, 'margin_dpo/margin_mean': 194.39535522460938, 'margin_dpo/margin_std': 205.177001953125, 'logps/chosen': -305.2633056640625, 'logps/rejected': -539.4804077148438, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'KL/chosen_KL_mean': -246.4661865234375, 'KL/rejected_KL_mean': -440.861572265625, 'KL/mean': -343.66387939453125, 'KL/std': 186.5137939453125, 'logits/chosen': -0.2643176317214966, 'logits/rejected': -0.2642500698566437, 'epoch': 0.41} + 41%|████ | 280/681 [11:52<17:09, 2.57s/it] 41%|████▏ | 281/681 [11:54<17:05, 2.56s/it] {'loss': 1.0639, 'grad_norm': 20.681076049804688, 'learning_rate': 3.6714120619553435e-07, 'fcm_dpo/beta': 0.0024358248338103294, 'fcm_dpo/q_t': 0.3996858596801758, 'fcm_dpo/delta': -0.02387945167720318, 'fcm_dpo/margin': 173.53970336914062, 'margin_dpo/margin_mean': 173.53970336914062, 'margin_dpo/margin_std': 196.98211669921875, 'logps/chosen': -283.5814208984375, 'logps/rejected': -482.51519775390625, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'KL/chosen_KL_mean': -228.09291076660156, 'KL/rejected_KL_mean': -401.6325988769531, 'KL/mean': -314.8627624511719, 'KL/std': 182.05502319335938, 'logits/chosen': -0.32415997982025146, 'logits/rejected': -0.2965441346168518, 'epoch': 0.41} + 41%|████▏ | 281/681 [11:54<17:05, 2.56s/it] 41%|████▏ | 282/681 [11:57<16:59, 2.55s/it] {'loss': 1.1443, 'grad_norm': 22.19266128540039, 'learning_rate': 3.660059364023408e-07, 'fcm_dpo/beta': 0.0024580340832471848, 'fcm_dpo/q_t': 0.42514273524284363, 'fcm_dpo/delta': 0.06647245585918427, 'fcm_dpo/margin': 136.3738555908203, 'margin_dpo/margin_mean': 136.37387084960938, 'margin_dpo/margin_std': 219.83755493164062, 'logps/chosen': -327.081298828125, 'logps/rejected': -485.7359619140625, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'KL/chosen_KL_mean': -254.01113891601562, 'KL/rejected_KL_mean': -390.38494873046875, 'KL/mean': -322.19805908203125, 'KL/std': 191.5809326171875, 'logits/chosen': -0.3909962475299835, 'logits/rejected': -0.3682512640953064, 'epoch': 0.41} + 41%|████▏ | 282/681 [11:57<16:59, 2.55s/it] 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] {'loss': 1.0088, 'grad_norm': 30.945764541625977, 'learning_rate': 3.6486760974483685e-07, 'fcm_dpo/beta': 0.0024265965912491083, 'fcm_dpo/q_t': 0.38304460048675537, 'fcm_dpo/delta': -0.10707136243581772, 'fcm_dpo/margin': 206.79803466796875, 'margin_dpo/margin_mean': 206.79803466796875, 'margin_dpo/margin_std': 210.00332641601562, 'logps/chosen': -328.092529296875, 'logps/rejected': -569.9786376953125, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'KL/chosen_KL_mean': -266.194091796875, 'KL/rejected_KL_mean': -472.99212646484375, 'KL/mean': -369.5931091308594, 'KL/std': 225.01544189453125, 'logits/chosen': -0.31001657247543335, 'logits/rejected': -0.3113076388835907, 'epoch': 0.42} + 42%|████▏ | 283/681 [11:59<16:51, 2.54s/it] 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] {'loss': 1.0441, 'grad_norm': 29.025760650634766, 'learning_rate': 3.6372625621898863e-07, 'fcm_dpo/beta': 0.0023820092901587486, 'fcm_dpo/q_t': 0.3964000344276428, 'fcm_dpo/delta': -0.051164183765649796, 'fcm_dpo/margin': 188.37283325195312, 'margin_dpo/margin_mean': 188.3728485107422, 'margin_dpo/margin_std': 209.56866455078125, 'logps/chosen': -321.7177734375, 'logps/rejected': -545.1243286132812, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'KL/chosen_KL_mean': -263.2822265625, 'KL/rejected_KL_mean': -451.6550598144531, 'KL/mean': -357.4686279296875, 'KL/std': 208.6199951171875, 'logits/chosen': -0.3494116961956024, 'logits/rejected': -0.33238211274147034, 'epoch': 0.42} + 42%|████▏ | 284/681 [12:02<17:10, 2.60s/it] 42%|████▏ | 285/681 [12:04<17:05, 2.59s/it] {'loss': 1.0727, 'grad_norm': 24.644567489624023, 'learning_rate': 3.625819059005228e-07, 'fcm_dpo/beta': 0.0023880950175225735, 'fcm_dpo/q_t': 0.401868611574173, 'fcm_dpo/delta': -0.016177460551261902, 'fcm_dpo/margin': 173.7642822265625, 'margin_dpo/margin_mean': 173.7642822265625, 'margin_dpo/margin_std': 203.36099243164062, 'logps/chosen': -369.1742858886719, 'logps/rejected': -575.833251953125, 'logps/ref_chosen': -66.23219299316406, 'logps/ref_rejected': -99.1268310546875, 'KL/chosen_KL_mean': -302.94207763671875, 'KL/rejected_KL_mean': -476.7063903808594, 'KL/mean': -389.82421875, 'KL/std': 186.600830078125, 'logits/chosen': -0.3161693811416626, 'logits/rejected': -0.29976439476013184, 'epoch': 0.42} + 42%|████▏ | 285/681 [12:05<17:05, 2.59s/it] 42%|████▏ | 286/681 [12:07<16:55, 2.57s/it] {'loss': 1.0606, 'grad_norm': 32.87556457519531, 'learning_rate': 3.614345889441346e-07, 'fcm_dpo/beta': 0.0023618116974830627, 'fcm_dpo/q_t': 0.3975561857223511, 'fcm_dpo/delta': -0.044894296675920486, 'fcm_dpo/margin': 187.52935791015625, 'margin_dpo/margin_mean': 187.52935791015625, 'margin_dpo/margin_std': 228.80307006835938, 'logps/chosen': -394.24163818359375, 'logps/rejected': -597.408447265625, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'KL/chosen_KL_mean': -321.2906494140625, 'KL/rejected_KL_mean': -508.82000732421875, 'KL/mean': -415.0553283691406, 'KL/std': 211.68765258789062, 'logits/chosen': -0.33208775520324707, 'logits/rejected': -0.31639528274536133, 'epoch': 0.42} + 42%|████▏ | 286/681 [12:07<16:55, 2.57s/it] 42%|████▏ | 287/681 [12:09<16:16, 2.48s/it] {'loss': 1.1102, 'grad_norm': 30.704463958740234, 'learning_rate': 3.6028433558269275e-07, 'fcm_dpo/beta': 0.0023672073148190975, 'fcm_dpo/q_t': 0.41586506366729736, 'fcm_dpo/delta': 0.038290925323963165, 'fcm_dpo/margin': 153.36337280273438, 'margin_dpo/margin_mean': 153.36337280273438, 'margin_dpo/margin_std': 203.52426147460938, 'logps/chosen': -358.1020812988281, 'logps/rejected': -527.620361328125, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.69607543945312, 'KL/chosen_KL_mean': -296.5609130859375, 'KL/rejected_KL_mean': -449.92431640625, 'KL/mean': -373.24261474609375, 'KL/std': 189.50225830078125, 'logits/chosen': -0.2822296619415283, 'logits/rejected': -0.2562822699546814, 'epoch': 0.42} + 42%|████▏ | 287/681 [12:09<16:16, 2.48s/it] 42%|████▏ | 288/681 [12:12<16:23, 2.50s/it] {'loss': 1.0495, 'grad_norm': 23.47823715209961, 'learning_rate': 3.5913117612644327e-07, 'fcm_dpo/beta': 0.0023379437625408173, 'fcm_dpo/q_t': 0.3972797393798828, 'fcm_dpo/delta': -0.04108269885182381, 'fcm_dpo/margin': 187.32608032226562, 'margin_dpo/margin_mean': 187.32608032226562, 'margin_dpo/margin_std': 196.14199829101562, 'logps/chosen': -340.30517578125, 'logps/rejected': -558.3056640625, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.33570098876953, 'KL/chosen_KL_mean': -283.6439514160156, 'KL/rejected_KL_mean': -470.969970703125, 'KL/mean': -377.3069763183594, 'KL/std': 187.588134765625, 'logits/chosen': -0.32454603910446167, 'logits/rejected': -0.30999091267585754, 'epoch': 0.42} + 42%|████▏ | 288/681 [12:12<16:23, 2.50s/it] 42%|████▏ | 289/681 [12:14<16:21, 2.50s/it] {'loss': 1.0143, 'grad_norm': 26.55727195739746, 'learning_rate': 3.5797514096221024e-07, 'fcm_dpo/beta': 0.0023207864724099636, 'fcm_dpo/q_t': 0.3854060769081116, 'fcm_dpo/delta': -0.10334105789661407, 'fcm_dpo/margin': 214.71755981445312, 'margin_dpo/margin_mean': 214.71755981445312, 'margin_dpo/margin_std': 228.02871704101562, 'logps/chosen': -310.98443603515625, 'logps/rejected': -568.1142578125, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'KL/chosen_KL_mean': -265.7540588378906, 'KL/rejected_KL_mean': -480.47161865234375, 'KL/mean': -373.11285400390625, 'KL/std': 206.18136596679688, 'logits/chosen': -0.26507318019866943, 'logits/rejected': -0.2650468349456787, 'epoch': 0.42} + 42%|████▏ | 289/681 [12:14<16:21, 2.50s/it] 43%|████▎ | 290/681 [12:17<16:31, 2.54s/it] {'loss': 1.0356, 'grad_norm': 20.6153621673584, 'learning_rate': 3.568162605525952e-07, 'fcm_dpo/beta': 0.0022673578932881355, 'fcm_dpo/q_t': 0.3898102939128876, 'fcm_dpo/delta': -0.09031336009502411, 'fcm_dpo/margin': 214.22250366210938, 'margin_dpo/margin_mean': 214.22250366210938, 'margin_dpo/margin_std': 258.4100341796875, 'logps/chosen': -332.32525634765625, 'logps/rejected': -607.7847900390625, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'KL/chosen_KL_mean': -276.853759765625, 'KL/rejected_KL_mean': -491.07623291015625, 'KL/mean': -383.96502685546875, 'KL/std': 217.60955810546875, 'logits/chosen': -0.3317207098007202, 'logits/rejected': -0.3523035943508148, 'epoch': 0.43} + 43%|████▎ | 290/681 [12:17<16:31, 2.54s/it] 43%|████▎ | 291/681 [12:20<16:37, 2.56s/it] {'loss': 1.052, 'grad_norm': 26.71583366394043, 'learning_rate': 3.5565456543517485e-07, 'fcm_dpo/beta': 0.0022549815475940704, 'fcm_dpo/q_t': 0.39613714814186096, 'fcm_dpo/delta': -0.047311414033174515, 'fcm_dpo/margin': 197.35003662109375, 'margin_dpo/margin_mean': 197.35003662109375, 'margin_dpo/margin_std': 223.89544677734375, 'logps/chosen': -293.30010986328125, 'logps/rejected': -516.6868896484375, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'KL/chosen_KL_mean': -230.03976440429688, 'KL/rejected_KL_mean': -427.3897705078125, 'KL/mean': -328.71478271484375, 'KL/std': 188.07647705078125, 'logits/chosen': -0.34385907649993896, 'logits/rejected': -0.3322584629058838, 'epoch': 0.43} + 43%|████▎ | 291/681 [12:20<16:37, 2.56s/it] 43%|████▎ | 292/681 [12:22<16:13, 2.50s/it] {'loss': 1.0614, 'grad_norm': 23.873905181884766, 'learning_rate': 3.5449008622169583e-07, 'fcm_dpo/beta': 0.0022292518988251686, 'fcm_dpo/q_t': 0.3962145447731018, 'fcm_dpo/delta': -0.05277468264102936, 'fcm_dpo/margin': 202.0208282470703, 'margin_dpo/margin_mean': 202.02084350585938, 'margin_dpo/margin_std': 255.2861328125, 'logps/chosen': -302.1072998046875, 'logps/rejected': -540.1710205078125, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'KL/chosen_KL_mean': -248.18878173828125, 'KL/rejected_KL_mean': -450.2095947265625, 'KL/mean': -349.1991882324219, 'KL/std': 218.77377319335938, 'logits/chosen': -0.31092000007629395, 'logits/rejected': -0.2947191596031189, 'epoch': 0.43} + 43%|████▎ | 292/681 [12:22<16:13, 2.50s/it] 43%|████▎ | 293/681 [12:25<16:25, 2.54s/it] {'loss': 1.1388, 'grad_norm': 44.14265060424805, 'learning_rate': 3.5332285359726846e-07, 'fcm_dpo/beta': 0.0022371455561369658, 'fcm_dpo/q_t': 0.42154398560523987, 'fcm_dpo/delta': 0.05408930033445358, 'fcm_dpo/margin': 155.41049194335938, 'margin_dpo/margin_mean': 155.41049194335938, 'margin_dpo/margin_std': 247.1007080078125, 'logps/chosen': -312.5982666015625, 'logps/rejected': -485.4851379394531, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.85244750976562, 'KL/chosen_KL_mean': -252.22222900390625, 'KL/rejected_KL_mean': -407.6326904296875, 'KL/mean': -329.9274597167969, 'KL/std': 214.19297790527344, 'logits/chosen': -0.32412204146385193, 'logits/rejected': -0.3133804202079773, 'epoch': 0.43} + 43%|████▎ | 293/681 [12:25<16:25, 2.54s/it] 43%|████▎ | 294/681 [12:27<16:08, 2.50s/it] {'loss': 1.0958, 'grad_norm': 27.088520050048828, 'learning_rate': 3.5215289831955786e-07, 'fcm_dpo/beta': 0.0022378209978342056, 'fcm_dpo/q_t': 0.4101282060146332, 'fcm_dpo/delta': 0.0072397105395793915, 'fcm_dpo/margin': 175.60528564453125, 'margin_dpo/margin_mean': 175.60528564453125, 'margin_dpo/margin_std': 234.76486206054688, 'logps/chosen': -272.8875732421875, 'logps/rejected': -482.30230712890625, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'KL/chosen_KL_mean': -224.80003356933594, 'KL/rejected_KL_mean': -400.40533447265625, 'KL/mean': -312.6026916503906, 'KL/std': 189.26589965820312, 'logits/chosen': -0.30610185861587524, 'logits/rejected': -0.31053173542022705, 'epoch': 0.43} + 43%|████▎ | 294/681 [12:27<16:08, 2.50s/it] 43%|████▎ | 295/681 [12:29<15:57, 2.48s/it] {'loss': 1.0748, 'grad_norm': 29.52766990661621, 'learning_rate': 3.509802512179737e-07, 'fcm_dpo/beta': 0.002234848216176033, 'fcm_dpo/q_t': 0.4011088013648987, 'fcm_dpo/delta': -0.03554647043347359, 'fcm_dpo/margin': 194.2014617919922, 'margin_dpo/margin_mean': 194.2014617919922, 'margin_dpo/margin_std': 255.34695434570312, 'logps/chosen': -326.4168701171875, 'logps/rejected': -558.1499633789062, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'KL/chosen_KL_mean': -276.4921875, 'KL/rejected_KL_mean': -470.69366455078125, 'KL/mean': -373.5928955078125, 'KL/std': 215.0086669921875, 'logits/chosen': -0.34492525458335876, 'logits/rejected': -0.34738168120384216, 'epoch': 0.43} + 43%|████▎ | 295/681 [12:29<15:57, 2.48s/it] 43%|████▎ | 296/681 [12:32<15:51, 2.47s/it] {'loss': 1.1858, 'grad_norm': 30.11356544494629, 'learning_rate': 3.498049431928577e-07, 'fcm_dpo/beta': 0.0022216294892132282, 'fcm_dpo/q_t': 0.4277653992176056, 'fcm_dpo/delta': -0.01307538989931345, 'fcm_dpo/margin': 141.15194702148438, 'margin_dpo/margin_mean': 141.15194702148438, 'margin_dpo/margin_std': 273.21856689453125, 'logps/chosen': -415.45196533203125, 'logps/rejected': -584.2017822265625, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'KL/chosen_KL_mean': -349.96075439453125, 'KL/rejected_KL_mean': -491.1127014160156, 'KL/mean': -420.53668212890625, 'KL/std': 214.16937255859375, 'logits/chosen': -0.35794180631637573, 'logits/rejected': -0.3423447906970978, 'epoch': 0.43} + 43%|████▎ | 296/681 [12:32<15:51, 2.47s/it] 44%|████▎ | 297/681 [12:34<16:02, 2.51s/it] {'loss': 1.0975, 'grad_norm': 26.392620086669922, 'learning_rate': 3.486270052146694e-07, 'fcm_dpo/beta': 0.0022283056750893593, 'fcm_dpo/q_t': 0.41309747099876404, 'fcm_dpo/delta': 0.028696084395051003, 'fcm_dpo/margin': 167.111572265625, 'margin_dpo/margin_mean': 167.111572265625, 'margin_dpo/margin_std': 204.6174774169922, 'logps/chosen': -362.55364990234375, 'logps/rejected': -568.3267822265625, 'logps/ref_chosen': -56.476951599121094, 'logps/ref_rejected': -95.1385498046875, 'KL/chosen_KL_mean': -306.0766906738281, 'KL/rejected_KL_mean': -473.188232421875, 'KL/mean': -389.6324768066406, 'KL/std': 202.3120574951172, 'logits/chosen': -0.3816351294517517, 'logits/rejected': -0.38721585273742676, 'epoch': 0.44} + 44%|████▎ | 297/681 [12:34<16:02, 2.51s/it] 44%|████▍ | 298/681 [12:37<16:07, 2.53s/it] {'loss': 1.0784, 'grad_norm': 23.501754760742188, 'learning_rate': 3.474464683231698e-07, 'fcm_dpo/beta': 0.002221038332208991, 'fcm_dpo/q_t': 0.39830517768859863, 'fcm_dpo/delta': -0.06791778653860092, 'fcm_dpo/margin': 209.22708129882812, 'margin_dpo/margin_mean': 209.2270965576172, 'margin_dpo/margin_std': 309.3797607421875, 'logps/chosen': -403.48193359375, 'logps/rejected': -662.0460205078125, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'KL/chosen_KL_mean': -336.15673828125, 'KL/rejected_KL_mean': -545.3837890625, 'KL/mean': -440.7702941894531, 'KL/std': 268.1351318359375, 'logits/chosen': -0.3646508455276489, 'logits/rejected': -0.382364422082901, 'epoch': 0.44} + 44%|████▍ | 298/681 [12:37<16:07, 2.53s/it] 44%|████▍ | 299/681 [12:40<16:06, 2.53s/it] {'loss': 1.0997, 'grad_norm': 30.56585121154785, 'learning_rate': 3.462633636266041e-07, 'fcm_dpo/beta': 0.0022161747328937054, 'fcm_dpo/q_t': 0.4088858366012573, 'fcm_dpo/delta': -0.0036800652742385864, 'fcm_dpo/margin': 181.78482055664062, 'margin_dpo/margin_mean': 181.78482055664062, 'margin_dpo/margin_std': 254.47584533691406, 'logps/chosen': -326.68988037109375, 'logps/rejected': -543.8408203125, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'KL/chosen_KL_mean': -277.727783203125, 'KL/rejected_KL_mean': -459.5125732421875, 'KL/mean': -368.62017822265625, 'KL/std': 201.8565673828125, 'logits/chosen': -0.38511306047439575, 'logits/rejected': -0.39016664028167725, 'epoch': 0.44} + 44%|████▍ | 299/681 [12:40<16:06, 2.53s/it] 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] {'loss': 1.0673, 'grad_norm': 32.0359992980957, 'learning_rate': 3.4507772230088147e-07, 'fcm_dpo/beta': 0.0021824706345796585, 'fcm_dpo/q_t': 0.3919123411178589, 'fcm_dpo/delta': -0.08032761514186859, 'fcm_dpo/margin': 218.35943603515625, 'margin_dpo/margin_mean': 218.3594512939453, 'margin_dpo/margin_std': 301.63983154296875, 'logps/chosen': -418.7271728515625, 'logps/rejected': -673.9793701171875, 'logps/ref_chosen': -59.07371139526367, 'logps/ref_rejected': -95.9664535522461, 'KL/chosen_KL_mean': -359.6534423828125, 'KL/rejected_KL_mean': -578.0128784179688, 'KL/mean': -468.8331604003906, 'KL/std': 245.97247314453125, 'logits/chosen': -0.3638511300086975, 'logits/rejected': -0.36910757422447205, 'epoch': 0.44} + 44%|████▍ | 300/681 [12:42<16:17, 2.57s/it] 44%|████▍ | 301/681 [12:44<15:42, 2.48s/it] {'loss': 1.0574, 'grad_norm': 23.908777236938477, 'learning_rate': 3.4388957558875316e-07, 'fcm_dpo/beta': 0.0021431921049952507, 'fcm_dpo/q_t': 0.397805392742157, 'fcm_dpo/delta': -0.05494837090373039, 'fcm_dpo/margin': 210.9517822265625, 'margin_dpo/margin_mean': 210.9517822265625, 'margin_dpo/margin_std': 264.4083557128906, 'logps/chosen': -357.1824951171875, 'logps/rejected': -603.2384033203125, 'logps/ref_chosen': -57.249366760253906, 'logps/ref_rejected': -92.35354614257812, 'KL/chosen_KL_mean': -299.93310546875, 'KL/rejected_KL_mean': -510.8848571777344, 'KL/mean': -405.40899658203125, 'KL/std': 220.83499145507812, 'logits/chosen': -0.3962569832801819, 'logits/rejected': -0.39823591709136963, 'epoch': 0.44} + 44%|████▍ | 301/681 [12:45<15:42, 2.48s/it] 44%|████▍ | 302/681 [12:47<16:05, 2.55s/it] {'loss': 1.0961, 'grad_norm': 25.441043853759766, 'learning_rate': 3.426989547989902e-07, 'fcm_dpo/beta': 0.0021400072146207094, 'fcm_dpo/q_t': 0.4114646017551422, 'fcm_dpo/delta': 0.014417506754398346, 'fcm_dpo/margin': 180.3203125, 'margin_dpo/margin_mean': 180.3203125, 'margin_dpo/margin_std': 233.26841735839844, 'logps/chosen': -301.6986083984375, 'logps/rejected': -528.04736328125, 'logps/ref_chosen': -51.197994232177734, 'logps/ref_rejected': -97.22636413574219, 'KL/chosen_KL_mean': -250.50064086914062, 'KL/rejected_KL_mean': -430.82098388671875, 'KL/mean': -340.6607971191406, 'KL/std': 200.06076049804688, 'logits/chosen': -0.37367284297943115, 'logits/rejected': -0.3793327212333679, 'epoch': 0.44} + 44%|████▍ | 302/681 [12:47<16:05, 2.55s/it] 44%|████▍ | 303/681 [12:50<16:34, 2.63s/it] {'loss': 1.1285, 'grad_norm': 25.745454788208008, 'learning_rate': 3.4150589130555773e-07, 'fcm_dpo/beta': 0.00216277944855392, 'fcm_dpo/q_t': 0.41918832063674927, 'fcm_dpo/delta': 0.05117795616388321, 'fcm_dpo/margin': 162.12612915039062, 'margin_dpo/margin_mean': 162.12612915039062, 'margin_dpo/margin_std': 235.95956420898438, 'logps/chosen': -318.85980224609375, 'logps/rejected': -501.2174072265625, 'logps/ref_chosen': -66.71394348144531, 'logps/ref_rejected': -86.94542694091797, 'KL/chosen_KL_mean': -252.14584350585938, 'KL/rejected_KL_mean': -414.27197265625, 'KL/mean': -333.20892333984375, 'KL/std': 197.92535400390625, 'logits/chosen': -0.3918335437774658, 'logits/rejected': -0.3803231716156006, 'epoch': 0.44} + 44%|████▍ | 303/681 [12:50<16:34, 2.63s/it] 45%|████▍ | 304/681 [12:53<16:43, 2.66s/it] {'loss': 1.0561, 'grad_norm': 44.084224700927734, 'learning_rate': 3.403104165467883e-07, 'fcm_dpo/beta': 0.002179923001676798, 'fcm_dpo/q_t': 0.40427181124687195, 'fcm_dpo/delta': 0.00038868188858032227, 'fcm_dpo/margin': 183.21072387695312, 'margin_dpo/margin_mean': 183.21072387695312, 'margin_dpo/margin_std': 157.74085998535156, 'logps/chosen': -293.90570068359375, 'logps/rejected': -495.63775634765625, 'logps/ref_chosen': -71.95069885253906, 'logps/ref_rejected': -90.47203063964844, 'KL/chosen_KL_mean': -221.95501708984375, 'KL/rejected_KL_mean': -405.16571044921875, 'KL/mean': -313.56036376953125, 'KL/std': 181.8985137939453, 'logits/chosen': -0.4207175672054291, 'logits/rejected': -0.41341572999954224, 'epoch': 0.45} + 45%|████▍ | 304/681 [12:53<16:43, 2.66s/it] 45%|████▍ | 305/681 [12:55<16:36, 2.65s/it] {'loss': 1.1229, 'grad_norm': 31.972244262695312, 'learning_rate': 3.391125620245535e-07, 'fcm_dpo/beta': 0.0021737192291766405, 'fcm_dpo/q_t': 0.41834086179733276, 'fcm_dpo/delta': 0.042266424745321274, 'fcm_dpo/margin': 165.08358764648438, 'margin_dpo/margin_mean': 165.08358764648438, 'margin_dpo/margin_std': 234.5511932373047, 'logps/chosen': -311.2120361328125, 'logps/rejected': -502.25494384765625, 'logps/ref_chosen': -66.79523468017578, 'logps/ref_rejected': -92.75459289550781, 'KL/chosen_KL_mean': -244.41680908203125, 'KL/rejected_KL_mean': -409.5003662109375, 'KL/mean': -326.9585876464844, 'KL/std': 211.88983154296875, 'logits/chosen': -0.41458988189697266, 'logits/rejected': -0.39753109216690063, 'epoch': 0.45} + 45%|████▍ | 305/681 [12:55<16:36, 2.65s/it] 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] {'loss': 1.1083, 'grad_norm': 25.6021785736084, 'learning_rate': 3.3791235930343417e-07, 'fcm_dpo/beta': 0.0022093781735748053, 'fcm_dpo/q_t': 0.4171670079231262, 'fcm_dpo/delta': 0.04615872725844383, 'fcm_dpo/margin': 160.84295654296875, 'margin_dpo/margin_mean': 160.84295654296875, 'margin_dpo/margin_std': 201.04806518554688, 'logps/chosen': -319.76422119140625, 'logps/rejected': -496.08245849609375, 'logps/ref_chosen': -69.68389892578125, 'logps/ref_rejected': -85.15919494628906, 'KL/chosen_KL_mean': -250.080322265625, 'KL/rejected_KL_mean': -410.92327880859375, 'KL/mean': -330.5018005371094, 'KL/std': 189.47010803222656, 'logits/chosen': -0.386802077293396, 'logits/rejected': -0.36289817094802856, 'epoch': 0.45} + 45%|████▍ | 306/681 [12:58<16:33, 2.65s/it] 45%|████▌ | 307/681 [13:01<16:41, 2.68s/it] {'loss': 1.1004, 'grad_norm': 25.33635139465332, 'learning_rate': 3.367098400098881e-07, 'fcm_dpo/beta': 0.0022153654135763645, 'fcm_dpo/q_t': 0.4120749235153198, 'fcm_dpo/delta': 0.022209253162145615, 'fcm_dpo/margin': 170.90798950195312, 'margin_dpo/margin_mean': 170.90798950195312, 'margin_dpo/margin_std': 223.1322021484375, 'logps/chosen': -307.1520080566406, 'logps/rejected': -494.8669128417969, 'logps/ref_chosen': -70.16542053222656, 'logps/ref_rejected': -86.97230529785156, 'KL/chosen_KL_mean': -236.98658752441406, 'KL/rejected_KL_mean': -407.89459228515625, 'KL/mean': -322.44061279296875, 'KL/std': 182.07717895507812, 'logits/chosen': -0.35360145568847656, 'logits/rejected': -0.3347788155078888, 'epoch': 0.45} + 45%|████▌ | 307/681 [13:01<16:41, 2.68s/it] 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] {'loss': 1.0462, 'grad_norm': 35.38976287841797, 'learning_rate': 3.355050358314172e-07, 'fcm_dpo/beta': 0.0022103004157543182, 'fcm_dpo/q_t': 0.39782798290252686, 'fcm_dpo/delta': -0.03460945934057236, 'fcm_dpo/margin': 195.95510864257812, 'margin_dpo/margin_mean': 195.95510864257812, 'margin_dpo/margin_std': 199.99911499023438, 'logps/chosen': -299.76092529296875, 'logps/rejected': -519.84326171875, 'logps/ref_chosen': -55.2449951171875, 'logps/ref_rejected': -79.37226104736328, 'KL/chosen_KL_mean': -244.5159149169922, 'KL/rejected_KL_mean': -440.47100830078125, 'KL/mean': -342.49346923828125, 'KL/std': 209.0433349609375, 'logits/chosen': -0.33471328020095825, 'logits/rejected': -0.32273852825164795, 'epoch': 0.45} + 45%|████▌ | 308/681 [13:03<16:27, 2.65s/it] 45%|████▌ | 309/681 [13:06<16:30, 2.66s/it] {'loss': 1.0737, 'grad_norm': 46.212398529052734, 'learning_rate': 3.3429797851573183e-07, 'fcm_dpo/beta': 0.0022104280069470406, 'fcm_dpo/q_t': 0.40153375267982483, 'fcm_dpo/delta': -0.02155473083257675, 'fcm_dpo/margin': 190.0969696044922, 'margin_dpo/margin_mean': 190.09698486328125, 'margin_dpo/margin_std': 229.8643798828125, 'logps/chosen': -301.35552978515625, 'logps/rejected': -524.8341674804688, 'logps/ref_chosen': -48.959083557128906, 'logps/ref_rejected': -82.34072875976562, 'KL/chosen_KL_mean': -252.39642333984375, 'KL/rejected_KL_mean': -442.4934387207031, 'KL/mean': -347.4449157714844, 'KL/std': 221.7578125, 'logits/chosen': -0.3147510886192322, 'logits/rejected': -0.30644166469573975, 'epoch': 0.45} + 45%|████▌ | 309/681 [13:06<16:30, 2.66s/it] 46%|████▌ | 310/681 [13:09<16:28, 2.66s/it] {'loss': 1.072, 'grad_norm': 33.55656051635742, 'learning_rate': 3.3308869986991487e-07, 'fcm_dpo/beta': 0.0022050600964576006, 'fcm_dpo/q_t': 0.40635180473327637, 'fcm_dpo/delta': 0.0034537650644779205, 'fcm_dpo/margin': 179.78012084960938, 'margin_dpo/margin_mean': 179.7801055908203, 'margin_dpo/margin_std': 189.23294067382812, 'logps/chosen': -356.65667724609375, 'logps/rejected': -553.6253051757812, 'logps/ref_chosen': -62.74177932739258, 'logps/ref_rejected': -79.9302978515625, 'KL/chosen_KL_mean': -293.9149169921875, 'KL/rejected_KL_mean': -473.69500732421875, 'KL/mean': -383.8049621582031, 'KL/std': 197.09872436523438, 'logits/chosen': -0.3364931046962738, 'logits/rejected': -0.3217809200286865, 'epoch': 0.46} + 46%|████▌ | 310/681 [13:09<16:28, 2.66s/it] 46%|████▌ | 311/681 [13:11<15:57, 2.59s/it] {'loss': 1.0687, 'grad_norm': 23.36595344543457, 'learning_rate': 3.3187723175958346e-07, 'fcm_dpo/beta': 0.0021827276796102524, 'fcm_dpo/q_t': 0.4002191424369812, 'fcm_dpo/delta': -0.04212527349591255, 'fcm_dpo/margin': 201.69406127929688, 'margin_dpo/margin_mean': 201.69406127929688, 'margin_dpo/margin_std': 261.9150085449219, 'logps/chosen': -360.04083251953125, 'logps/rejected': -586.1451416015625, 'logps/ref_chosen': -53.02798080444336, 'logps/ref_rejected': -77.43820190429688, 'KL/chosen_KL_mean': -307.0128173828125, 'KL/rejected_KL_mean': -508.7069091796875, 'KL/mean': -407.85986328125, 'KL/std': 248.04620361328125, 'logits/chosen': -0.3425959348678589, 'logits/rejected': -0.31884661316871643, 'epoch': 0.46} + 46%|████▌ | 311/681 [13:11<15:57, 2.59s/it] 46%|████▌ | 312/681 [13:13<15:23, 2.50s/it] {'loss': 1.0923, 'grad_norm': 25.498783111572266, 'learning_rate': 3.306636061080487e-07, 'fcm_dpo/beta': 0.0021842336282134056, 'fcm_dpo/q_t': 0.4080832004547119, 'fcm_dpo/delta': -0.010445069521665573, 'fcm_dpo/margin': 187.60142517089844, 'margin_dpo/margin_mean': 187.6014404296875, 'margin_dpo/margin_std': 262.27001953125, 'logps/chosen': -341.0242614746094, 'logps/rejected': -555.0262451171875, 'logps/ref_chosen': -49.39221954345703, 'logps/ref_rejected': -75.79280853271484, 'KL/chosen_KL_mean': -291.6320495605469, 'KL/rejected_KL_mean': -479.23345947265625, 'KL/mean': -385.4327697753906, 'KL/std': 222.2432861328125, 'logits/chosen': -0.2573780417442322, 'logits/rejected': -0.2468489408493042, 'epoch': 0.46} + 46%|████▌ | 312/681 [13:13<15:23, 2.50s/it] 46%|████▌ | 313/681 [13:16<15:03, 2.45s/it] {'loss': 1.0768, 'grad_norm': 24.87454605102539, 'learning_rate': 3.2944785489547537e-07, 'fcm_dpo/beta': 0.0021794200874865055, 'fcm_dpo/q_t': 0.40177449584007263, 'fcm_dpo/delta': -0.03143874555826187, 'fcm_dpo/margin': 197.0161895751953, 'margin_dpo/margin_mean': 197.0161895751953, 'margin_dpo/margin_std': 253.28038024902344, 'logps/chosen': -324.9635925292969, 'logps/rejected': -558.2332763671875, 'logps/ref_chosen': -50.152740478515625, 'logps/ref_rejected': -86.40620422363281, 'KL/chosen_KL_mean': -274.81085205078125, 'KL/rejected_KL_mean': -471.82708740234375, 'KL/mean': -373.3189392089844, 'KL/std': 231.67160034179688, 'logits/chosen': -0.33512693643569946, 'logits/rejected': -0.3323609530925751, 'epoch': 0.46} + 46%|████▌ | 313/681 [13:16<15:03, 2.45s/it] 46%|████▌ | 314/681 [13:18<15:25, 2.52s/it] {'loss': 1.1194, 'grad_norm': 22.955171585083008, 'learning_rate': 3.2823001015803857e-07, 'fcm_dpo/beta': 0.0021564702037721872, 'fcm_dpo/q_t': 0.4130541682243347, 'fcm_dpo/delta': 0.012253139168024063, 'fcm_dpo/margin': 179.90646362304688, 'margin_dpo/margin_mean': 179.90646362304688, 'margin_dpo/margin_std': 277.59881591796875, 'logps/chosen': -355.9537353515625, 'logps/rejected': -576.2191772460938, 'logps/ref_chosen': -57.237579345703125, 'logps/ref_rejected': -97.5965347290039, 'KL/chosen_KL_mean': -298.7161560058594, 'KL/rejected_KL_mean': -478.62261962890625, 'KL/mean': -388.66937255859375, 'KL/std': 222.7989501953125, 'logits/chosen': -0.3842218816280365, 'logits/rejected': -0.3846893906593323, 'epoch': 0.46} + 46%|████▌ | 314/681 [13:18<15:25, 2.52s/it] 46%|████▋ | 315/681 [13:21<15:30, 2.54s/it] {'loss': 1.1207, 'grad_norm': 24.254276275634766, 'learning_rate': 3.270101039870797e-07, 'fcm_dpo/beta': 0.002180763054639101, 'fcm_dpo/q_t': 0.41917771100997925, 'fcm_dpo/delta': 0.050937261432409286, 'fcm_dpo/margin': 160.8974151611328, 'margin_dpo/margin_mean': 160.8974151611328, 'margin_dpo/margin_std': 221.48760986328125, 'logps/chosen': -318.91949462890625, 'logps/rejected': -516.42822265625, 'logps/ref_chosen': -49.06958770751953, 'logps/ref_rejected': -85.68087768554688, 'KL/chosen_KL_mean': -269.84991455078125, 'KL/rejected_KL_mean': -430.747314453125, 'KL/mean': -350.29864501953125, 'KL/std': 197.81268310546875, 'logits/chosen': -0.30912429094314575, 'logits/rejected': -0.31343331933021545, 'epoch': 0.46} + 46%|████▋ | 315/681 [13:21<15:30, 2.54s/it] 46%|████▋ | 316/681 [13:24<15:28, 2.54s/it] {'loss': 1.0134, 'grad_norm': 31.184669494628906, 'learning_rate': 3.2578816852826086e-07, 'fcm_dpo/beta': 0.00214382354170084, 'fcm_dpo/q_t': 0.38564345240592957, 'fcm_dpo/delta': -0.10627135634422302, 'fcm_dpo/margin': 233.23556518554688, 'margin_dpo/margin_mean': 233.23556518554688, 'margin_dpo/margin_std': 248.93862915039062, 'logps/chosen': -309.7789306640625, 'logps/rejected': -590.03515625, 'logps/ref_chosen': -54.26074981689453, 'logps/ref_rejected': -101.2814712524414, 'KL/chosen_KL_mean': -255.51815795898438, 'KL/rejected_KL_mean': -488.75372314453125, 'KL/mean': -372.13592529296875, 'KL/std': 227.74038696289062, 'logits/chosen': -0.3521896004676819, 'logits/rejected': -0.35530799627304077, 'epoch': 0.46} + 46%|████▋ | 316/681 [13:24<15:28, 2.54s/it] 47%|████▋ | 317/681 [13:26<15:33, 2.56s/it] {'loss': 0.978, 'grad_norm': 26.17375946044922, 'learning_rate': 3.2456423598071783e-07, 'fcm_dpo/beta': 0.002105377148836851, 'fcm_dpo/q_t': 0.3748946189880371, 'fcm_dpo/delta': -0.14325766265392303, 'fcm_dpo/margin': 254.4990997314453, 'margin_dpo/margin_mean': 254.4990997314453, 'margin_dpo/margin_std': 230.30081176757812, 'logps/chosen': -308.8658447265625, 'logps/rejected': -607.9697265625, 'logps/ref_chosen': -56.094207763671875, 'logps/ref_rejected': -100.69905090332031, 'KL/chosen_KL_mean': -252.7716064453125, 'KL/rejected_KL_mean': -507.27069091796875, 'KL/mean': -380.02117919921875, 'KL/std': 206.77581787109375, 'logits/chosen': -0.390718936920166, 'logits/rejected': -0.3826904892921448, 'epoch': 0.47} + 47%|████▋ | 317/681 [13:26<15:33, 2.56s/it] 47%|████▋ | 318/681 [13:29<15:13, 2.52s/it] {'loss': 1.0773, 'grad_norm': 23.02154541015625, 'learning_rate': 3.233383385962115e-07, 'fcm_dpo/beta': 0.0020867723505944014, 'fcm_dpo/q_t': 0.4067472815513611, 'fcm_dpo/delta': -0.001836409792304039, 'fcm_dpo/margin': 192.4683380126953, 'margin_dpo/margin_mean': 192.4683380126953, 'margin_dpo/margin_std': 227.6715850830078, 'logps/chosen': -334.1149597167969, 'logps/rejected': -544.7018432617188, 'logps/ref_chosen': -64.64569854736328, 'logps/ref_rejected': -82.76425170898438, 'KL/chosen_KL_mean': -269.4692687988281, 'KL/rejected_KL_mean': -461.93756103515625, 'KL/mean': -365.70343017578125, 'KL/std': 210.68841552734375, 'logits/chosen': -0.4208700656890869, 'logits/rejected': -0.3912370800971985, 'epoch': 0.47} + 47%|████▋ | 318/681 [13:29<15:13, 2.52s/it] 47%|████▋ | 319/681 [13:31<15:30, 2.57s/it] {'loss': 1.0361, 'grad_norm': 28.023488998413086, 'learning_rate': 3.2211050867827805e-07, 'fcm_dpo/beta': 0.00206323666498065, 'fcm_dpo/q_t': 0.3929300308227539, 'fcm_dpo/delta': -0.0612642765045166, 'fcm_dpo/margin': 222.212646484375, 'margin_dpo/margin_mean': 222.212646484375, 'margin_dpo/margin_std': 239.76544189453125, 'logps/chosen': -289.0303649902344, 'logps/rejected': -575.7657470703125, 'logps/ref_chosen': -49.383758544921875, 'logps/ref_rejected': -113.90650939941406, 'KL/chosen_KL_mean': -239.6466064453125, 'KL/rejected_KL_mean': -461.8592529296875, 'KL/mean': -350.7529296875, 'KL/std': 224.13626098632812, 'logits/chosen': -0.38181760907173157, 'logits/rejected': -0.3934275507926941, 'epoch': 0.47} + 47%|████▋ | 319/681 [13:31<15:30, 2.57s/it] 47%|████▋ | 320/681 [13:34<15:35, 2.59s/it] {'loss': 1.0213, 'grad_norm': 31.34676742553711, 'learning_rate': 3.208807785813777e-07, 'fcm_dpo/beta': 0.0020233364775776863, 'fcm_dpo/q_t': 0.3885904550552368, 'fcm_dpo/delta': -0.08694636821746826, 'fcm_dpo/margin': 238.38136291503906, 'margin_dpo/margin_mean': 238.38136291503906, 'margin_dpo/margin_std': 253.5137481689453, 'logps/chosen': -310.99200439453125, 'logps/rejected': -587.53564453125, 'logps/ref_chosen': -59.50489044189453, 'logps/ref_rejected': -97.66717529296875, 'KL/chosen_KL_mean': -251.48712158203125, 'KL/rejected_KL_mean': -489.8685302734375, 'KL/mean': -370.67779541015625, 'KL/std': 234.5609130859375, 'logits/chosen': -0.3809563219547272, 'logits/rejected': -0.3856205940246582, 'epoch': 0.47} + 47%|████▋ | 320/681 [13:34<15:35, 2.59s/it] 47%|████▋ | 321/681 [13:36<15:19, 2.56s/it] {'loss': 1.0825, 'grad_norm': 22.613510131835938, 'learning_rate': 3.1964918071004217e-07, 'fcm_dpo/beta': 0.0020058308728039265, 'fcm_dpo/q_t': 0.4050200581550598, 'fcm_dpo/delta': -0.013504378497600555, 'fcm_dpo/margin': 205.5706787109375, 'margin_dpo/margin_mean': 205.5706787109375, 'margin_dpo/margin_std': 260.4750061035156, 'logps/chosen': -376.25860595703125, 'logps/rejected': -611.921630859375, 'logps/ref_chosen': -61.548683166503906, 'logps/ref_rejected': -91.64103698730469, 'KL/chosen_KL_mean': -314.7099304199219, 'KL/rejected_KL_mean': -520.2805786132812, 'KL/mean': -417.4952392578125, 'KL/std': 242.76919555664062, 'logits/chosen': -0.3279907703399658, 'logits/rejected': -0.31290388107299805, 'epoch': 0.47} + 47%|████▋ | 321/681 [13:36<15:19, 2.56s/it] 47%|████▋ | 322/681 [13:39<15:24, 2.58s/it] {'loss': 1.0334, 'grad_norm': 27.584213256835938, 'learning_rate': 3.184157475180207e-07, 'fcm_dpo/beta': 0.0019898181781172752, 'fcm_dpo/q_t': 0.3928527235984802, 'fcm_dpo/delta': -0.06161149963736534, 'fcm_dpo/margin': 230.3541259765625, 'margin_dpo/margin_mean': 230.3541259765625, 'margin_dpo/margin_std': 238.74765014648438, 'logps/chosen': -335.2269287109375, 'logps/rejected': -604.0408935546875, 'logps/ref_chosen': -57.29003143310547, 'logps/ref_rejected': -95.74992370605469, 'KL/chosen_KL_mean': -277.9368896484375, 'KL/rejected_KL_mean': -508.2909851074219, 'KL/mean': -393.11395263671875, 'KL/std': 222.57351684570312, 'logits/chosen': -0.3369908928871155, 'logits/rejected': -0.33506596088409424, 'epoch': 0.47} + 47%|████▋ | 322/681 [13:39<15:24, 2.58s/it] 47%|████▋ | 323/681 [13:42<15:29, 2.59s/it] {'loss': 1.0976, 'grad_norm': 37.98896789550781, 'learning_rate': 3.171805115074251e-07, 'fcm_dpo/beta': 0.0019924892112612724, 'fcm_dpo/q_t': 0.4107508361339569, 'fcm_dpo/delta': 0.018896615132689476, 'fcm_dpo/margin': 191.61865234375, 'margin_dpo/margin_mean': 191.61865234375, 'margin_dpo/margin_std': 239.77487182617188, 'logps/chosen': -359.580322265625, 'logps/rejected': -575.0269775390625, 'logps/ref_chosen': -51.23395919799805, 'logps/ref_rejected': -75.06192016601562, 'KL/chosen_KL_mean': -308.34637451171875, 'KL/rejected_KL_mean': -499.96502685546875, 'KL/mean': -404.15570068359375, 'KL/std': 221.2327423095703, 'logits/chosen': -0.3975059986114502, 'logits/rejected': -0.39840167760849, 'epoch': 0.47} + 47%|████▋ | 323/681 [13:42<15:29, 2.59s/it] 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] {'loss': 1.1188, 'grad_norm': 45.15947723388672, 'learning_rate': 3.1594350522787295e-07, 'fcm_dpo/beta': 0.0020171115174889565, 'fcm_dpo/q_t': 0.4130977690219879, 'fcm_dpo/delta': 0.011590391397476196, 'fcm_dpo/margin': 192.13768005371094, 'margin_dpo/margin_mean': 192.13768005371094, 'margin_dpo/margin_std': 280.11346435546875, 'logps/chosen': -428.28216552734375, 'logps/rejected': -641.76220703125, 'logps/ref_chosen': -65.13516998291016, 'logps/ref_rejected': -86.47750854492188, 'KL/chosen_KL_mean': -363.1470031738281, 'KL/rejected_KL_mean': -555.28466796875, 'KL/mean': -459.2158203125, 'KL/std': 245.32525634765625, 'logits/chosen': -0.382703959941864, 'logits/rejected': -0.36886465549468994, 'epoch': 0.48} + 48%|████▊ | 324/681 [13:44<15:10, 2.55s/it] 48%|████▊ | 325/681 [13:47<15:40, 2.64s/it] {'loss': 1.1353, 'grad_norm': 27.52566909790039, 'learning_rate': 3.147047612756302e-07, 'fcm_dpo/beta': 0.002026339527219534, 'fcm_dpo/q_t': 0.4235909581184387, 'fcm_dpo/delta': 0.07426172494888306, 'fcm_dpo/margin': 161.92161560058594, 'margin_dpo/margin_mean': 161.921630859375, 'margin_dpo/margin_std': 220.10113525390625, 'logps/chosen': -348.02099609375, 'logps/rejected': -523.81298828125, 'logps/ref_chosen': -56.215599060058594, 'logps/ref_rejected': -70.08592987060547, 'KL/chosen_KL_mean': -291.805419921875, 'KL/rejected_KL_mean': -453.72705078125, 'KL/mean': -372.7662353515625, 'KL/std': 219.71487426757812, 'logits/chosen': -0.38832759857177734, 'logits/rejected': -0.36508649587631226, 'epoch': 0.48} + 48%|████▊ | 325/681 [13:47<15:40, 2.64s/it] 48%|████▊ | 326/681 [13:50<15:52, 2.68s/it] {'loss': 1.1258, 'grad_norm': 49.80532455444336, 'learning_rate': 3.134643122927519e-07, 'fcm_dpo/beta': 0.00205246196128428, 'fcm_dpo/q_t': 0.42195820808410645, 'fcm_dpo/delta': 0.07252933084964752, 'fcm_dpo/margin': 160.740966796875, 'margin_dpo/margin_mean': 160.74095153808594, 'margin_dpo/margin_std': 199.02053833007812, 'logps/chosen': -407.47711181640625, 'logps/rejected': -575.33984375, 'logps/ref_chosen': -72.72496032714844, 'logps/ref_rejected': -79.8467788696289, 'KL/chosen_KL_mean': -334.75213623046875, 'KL/rejected_KL_mean': -495.49310302734375, 'KL/mean': -415.12261962890625, 'KL/std': 209.2508544921875, 'logits/chosen': -0.4096953272819519, 'logits/rejected': -0.3885076642036438, 'epoch': 0.48} + 48%|████▊ | 326/681 [13:50<15:52, 2.68s/it] 48%|████▊ | 327/681 [13:52<15:51, 2.69s/it] {'loss': 1.0081, 'grad_norm': 34.839141845703125, 'learning_rate': 3.1222219096622264e-07, 'fcm_dpo/beta': 0.0020246244966983795, 'fcm_dpo/q_t': 0.38342660665512085, 'fcm_dpo/delta': -0.10278213024139404, 'fcm_dpo/margin': 245.57040405273438, 'margin_dpo/margin_mean': 245.57040405273438, 'margin_dpo/margin_std': 245.6561279296875, 'logps/chosen': -358.83251953125, 'logps/rejected': -647.2022705078125, 'logps/ref_chosen': -69.13441467285156, 'logps/ref_rejected': -111.93377685546875, 'KL/chosen_KL_mean': -289.6980895996094, 'KL/rejected_KL_mean': -535.2684936523438, 'KL/mean': -412.4832763671875, 'KL/std': 226.18698120117188, 'logits/chosen': -0.40583473443984985, 'logits/rejected': -0.3938768804073334, 'epoch': 0.48} + 48%|████▊ | 327/681 [13:52<15:51, 2.69s/it] 48%|████▊ | 328/681 [13:55<15:39, 2.66s/it] {'loss': 1.0624, 'grad_norm': 31.601329803466797, 'learning_rate': 3.1097843002709427e-07, 'fcm_dpo/beta': 0.002008104231208563, 'fcm_dpo/q_t': 0.3990749418735504, 'fcm_dpo/delta': -0.046333495527505875, 'fcm_dpo/margin': 221.25289916992188, 'margin_dpo/margin_mean': 221.25289916992188, 'margin_dpo/margin_std': 280.74591064453125, 'logps/chosen': -373.2491455078125, 'logps/rejected': -625.6698608398438, 'logps/ref_chosen': -59.68719482421875, 'logps/ref_rejected': -90.85499572753906, 'KL/chosen_KL_mean': -313.56195068359375, 'KL/rejected_KL_mean': -534.8148803710938, 'KL/mean': -424.1883850097656, 'KL/std': 248.53616333007812, 'logits/chosen': -0.37076377868652344, 'logits/rejected': -0.3748210668563843, 'epoch': 0.48} + 48%|████▊ | 328/681 [13:55<15:39, 2.66s/it] 48%|████▊ | 329/681 [13:57<15:16, 2.60s/it] {'loss': 1.0626, 'grad_norm': 29.35655975341797, 'learning_rate': 3.0973306224962437e-07, 'fcm_dpo/beta': 0.001973442966118455, 'fcm_dpo/q_t': 0.3950253129005432, 'fcm_dpo/delta': -0.057223327457904816, 'fcm_dpo/margin': 229.90365600585938, 'margin_dpo/margin_mean': 229.90365600585938, 'margin_dpo/margin_std': 285.9508972167969, 'logps/chosen': -413.6102294921875, 'logps/rejected': -678.9653930664062, 'logps/ref_chosen': -65.2461929321289, 'logps/ref_rejected': -100.69770812988281, 'KL/chosen_KL_mean': -348.364013671875, 'KL/rejected_KL_mean': -578.2677001953125, 'KL/mean': -463.31585693359375, 'KL/std': 264.98114013671875, 'logits/chosen': -0.38779300451278687, 'logits/rejected': -0.3826950788497925, 'epoch': 0.48} + 48%|████▊ | 329/681 [13:58<15:16, 2.60s/it] 48%|████▊ | 330/681 [14:00<15:17, 2.61s/it] {'loss': 1.0349, 'grad_norm': 24.276758193969727, 'learning_rate': 3.084861204504122e-07, 'fcm_dpo/beta': 0.00195663096383214, 'fcm_dpo/q_t': 0.3914690613746643, 'fcm_dpo/delta': -0.07452473044395447, 'fcm_dpo/margin': 240.72103881835938, 'margin_dpo/margin_mean': 240.72105407714844, 'margin_dpo/margin_std': 271.8794860839844, 'logps/chosen': -344.89227294921875, 'logps/rejected': -625.4918212890625, 'logps/ref_chosen': -46.998348236083984, 'logps/ref_rejected': -86.87684631347656, 'KL/chosen_KL_mean': -297.8939208984375, 'KL/rejected_KL_mean': -538.614990234375, 'KL/mean': -418.2544250488281, 'KL/std': 251.08877563476562, 'logits/chosen': -0.37196603417396545, 'logits/rejected': -0.37232887744903564, 'epoch': 0.48} + 48%|████▊ | 330/681 [14:00<15:17, 2.61s/it] 49%|████▊ | 331/681 [14:03<15:01, 2.57s/it] {'loss': 1.02, 'grad_norm': 30.4269962310791, 'learning_rate': 3.072376374875335e-07, 'fcm_dpo/beta': 0.0019327991176396608, 'fcm_dpo/q_t': 0.3927758038043976, 'fcm_dpo/delta': -0.054849639534950256, 'fcm_dpo/margin': 234.03463745117188, 'margin_dpo/margin_mean': 234.03463745117188, 'margin_dpo/margin_std': 203.57736206054688, 'logps/chosen': -375.3511657714844, 'logps/rejected': -647.876953125, 'logps/ref_chosen': -50.52424621582031, 'logps/ref_rejected': -89.01544189453125, 'KL/chosen_KL_mean': -324.826904296875, 'KL/rejected_KL_mean': -558.861572265625, 'KL/mean': -441.84423828125, 'KL/std': 205.87355041503906, 'logits/chosen': -0.4110090136528015, 'logits/rejected': -0.4085603356361389, 'epoch': 0.49} + 49%|████▊ | 331/681 [14:03<15:01, 2.57s/it] 49%|████▉ | 332/681 [14:05<14:41, 2.53s/it] {'loss': 1.1466, 'grad_norm': 26.13042449951172, 'learning_rate': 3.059876462596758e-07, 'fcm_dpo/beta': 0.0019560197833925486, 'fcm_dpo/q_t': 0.42724329233169556, 'fcm_dpo/delta': 0.08553433418273926, 'fcm_dpo/margin': 162.0646209716797, 'margin_dpo/margin_mean': 162.0646209716797, 'margin_dpo/margin_std': 242.46429443359375, 'logps/chosen': -380.6527099609375, 'logps/rejected': -570.0221557617188, 'logps/ref_chosen': -49.18028259277344, 'logps/ref_rejected': -76.48515319824219, 'KL/chosen_KL_mean': -331.472412109375, 'KL/rejected_KL_mean': -493.5369873046875, 'KL/mean': -412.5047302246094, 'KL/std': 217.41600036621094, 'logits/chosen': -0.43391871452331543, 'logits/rejected': -0.4177435040473938, 'epoch': 0.49} + 49%|████▉ | 332/681 [14:05<14:41, 2.53s/it] 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] {'loss': 1.0917, 'grad_norm': 26.93458366394043, 'learning_rate': 3.0473617970527015e-07, 'fcm_dpo/beta': 0.0019417135044932365, 'fcm_dpo/q_t': 0.40386736392974854, 'fcm_dpo/delta': -0.02682226523756981, 'fcm_dpo/margin': 218.76780700683594, 'margin_dpo/margin_mean': 218.76780700683594, 'margin_dpo/margin_std': 306.374267578125, 'logps/chosen': -410.664794921875, 'logps/rejected': -660.720947265625, 'logps/ref_chosen': -63.75574493408203, 'logps/ref_rejected': -95.04411315917969, 'KL/chosen_KL_mean': -346.9090270996094, 'KL/rejected_KL_mean': -565.6768798828125, 'KL/mean': -456.2929382324219, 'KL/std': 254.33526611328125, 'logits/chosen': -0.43507999181747437, 'logits/rejected': -0.43221938610076904, 'epoch': 0.49} + 49%|████▉ | 333/681 [14:07<14:24, 2.48s/it] 49%|████▉ | 334/681 [14:10<14:21, 2.48s/it] {'loss': 1.1117, 'grad_norm': 25.05001449584961, 'learning_rate': 3.034832708016243e-07, 'fcm_dpo/beta': 0.0019499869085848331, 'fcm_dpo/q_t': 0.41010695695877075, 'fcm_dpo/delta': 0.004075163975358009, 'fcm_dpo/margin': 203.11204528808594, 'margin_dpo/margin_mean': 203.112060546875, 'margin_dpo/margin_std': 303.74609375, 'logps/chosen': -391.4478759765625, 'logps/rejected': -622.8970947265625, 'logps/ref_chosen': -66.97975158691406, 'logps/ref_rejected': -95.31692504882812, 'KL/chosen_KL_mean': -324.4681396484375, 'KL/rejected_KL_mean': -527.5802001953125, 'KL/mean': -426.024169921875, 'KL/std': 273.755859375, 'logits/chosen': -0.42128774523735046, 'logits/rejected': -0.4194262623786926, 'epoch': 0.49} + 49%|████▉ | 334/681 [14:10<14:21, 2.48s/it] 49%|████▉ | 335/681 [14:12<14:29, 2.51s/it] {'loss': 1.1714, 'grad_norm': 27.41286277770996, 'learning_rate': 3.022289525640531e-07, 'fcm_dpo/beta': 0.0019520404748618603, 'fcm_dpo/q_t': 0.42951834201812744, 'fcm_dpo/delta': -0.0027985575143247843, 'fcm_dpo/margin': 157.77536010742188, 'margin_dpo/margin_mean': 157.77536010742188, 'margin_dpo/margin_std': 273.3466491699219, 'logps/chosen': -407.6168212890625, 'logps/rejected': -590.4674072265625, 'logps/ref_chosen': -62.54248046875, 'logps/ref_rejected': -87.61770629882812, 'KL/chosen_KL_mean': -345.0743408203125, 'KL/rejected_KL_mean': -502.8497314453125, 'KL/mean': -423.9620361328125, 'KL/std': 249.84854125976562, 'logits/chosen': -0.45320796966552734, 'logits/rejected': -0.43336862325668335, 'epoch': 0.49} + 49%|████▉ | 335/681 [14:13<14:29, 2.51s/it] 49%|████▉ | 336/681 [14:15<14:33, 2.53s/it] {'loss': 1.0543, 'grad_norm': 27.173513412475586, 'learning_rate': 3.009732580450086e-07, 'fcm_dpo/beta': 0.0019327957415953279, 'fcm_dpo/q_t': 0.3922100067138672, 'fcm_dpo/delta': -0.08437924087047577, 'fcm_dpo/margin': 248.549072265625, 'margin_dpo/margin_mean': 248.549072265625, 'margin_dpo/margin_std': 324.4247131347656, 'logps/chosen': -382.33355712890625, 'logps/rejected': -680.7556762695312, 'logps/ref_chosen': -54.53115463256836, 'logps/ref_rejected': -104.40424346923828, 'KL/chosen_KL_mean': -327.80242919921875, 'KL/rejected_KL_mean': -576.3514404296875, 'KL/mean': -452.0769348144531, 'KL/std': 280.5369873046875, 'logits/chosen': -0.41087979078292847, 'logits/rejected': -0.41120561957359314, 'epoch': 0.49} + 49%|████▉ | 336/681 [14:15<14:33, 2.53s/it] 49%|████▉ | 337/681 [14:18<14:30, 2.53s/it] {'loss': 1.0376, 'grad_norm': 32.10745620727539, 'learning_rate': 2.9971622033320914e-07, 'fcm_dpo/beta': 0.0018971418030560017, 'fcm_dpo/q_t': 0.39354777336120605, 'fcm_dpo/delta': -0.061946481466293335, 'fcm_dpo/margin': 241.8828887939453, 'margin_dpo/margin_mean': 241.88287353515625, 'margin_dpo/margin_std': 264.9962463378906, 'logps/chosen': -360.2503356933594, 'logps/rejected': -638.7315673828125, 'logps/ref_chosen': -65.12869262695312, 'logps/ref_rejected': -101.72701263427734, 'KL/chosen_KL_mean': -295.12164306640625, 'KL/rejected_KL_mean': -537.0045166015625, 'KL/mean': -416.0631103515625, 'KL/std': 226.8846893310547, 'logits/chosen': -0.4443337321281433, 'logits/rejected': -0.4344029426574707, 'epoch': 0.49} + 49%|████▉ | 337/681 [14:18<14:30, 2.53s/it] 50%|████▉ | 338/681 [14:20<14:32, 2.54s/it] {'loss': 1.0405, 'grad_norm': 31.5482234954834, 'learning_rate': 2.984578725527675e-07, 'fcm_dpo/beta': 0.0018818873213604093, 'fcm_dpo/q_t': 0.3970540463924408, 'fcm_dpo/delta': -0.03903310373425484, 'fcm_dpo/margin': 232.34637451171875, 'margin_dpo/margin_mean': 232.34637451171875, 'margin_dpo/margin_std': 230.95556640625, 'logps/chosen': -330.4953308105469, 'logps/rejected': -593.487548828125, 'logps/ref_chosen': -58.422706604003906, 'logps/ref_rejected': -89.06854248046875, 'KL/chosen_KL_mean': -272.0726318359375, 'KL/rejected_KL_mean': -504.41900634765625, 'KL/mean': -388.24578857421875, 'KL/std': 222.21337890625, 'logits/chosen': -0.4323264956474304, 'logits/rejected': -0.4272562265396118, 'epoch': 0.5} + 50%|████▉ | 338/681 [14:20<14:32, 2.54s/it] 50%|████▉ | 339/681 [14:22<14:03, 2.47s/it] {'loss': 1.0496, 'grad_norm': 28.834077835083008, 'learning_rate': 2.9719824786231796e-07, 'fcm_dpo/beta': 0.0018855368252843618, 'fcm_dpo/q_t': 0.3984706401824951, 'fcm_dpo/delta': -0.03534376621246338, 'fcm_dpo/margin': 229.56790161132812, 'margin_dpo/margin_mean': 229.56790161132812, 'margin_dpo/margin_std': 228.10092163085938, 'logps/chosen': -347.5762634277344, 'logps/rejected': -621.0597534179688, 'logps/ref_chosen': -59.99531555175781, 'logps/ref_rejected': -103.9109115600586, 'KL/chosen_KL_mean': -287.5809326171875, 'KL/rejected_KL_mean': -517.1488647460938, 'KL/mean': -402.3648986816406, 'KL/std': 230.27059936523438, 'logits/chosen': -0.473450243473053, 'logits/rejected': -0.46168074011802673, 'epoch': 0.5} + 50%|████▉ | 339/681 [14:22<14:03, 2.47s/it] 50%|████▉ | 340/681 [14:25<14:11, 2.50s/it] {'loss': 1.1047, 'grad_norm': 31.226028442382812, 'learning_rate': 2.959373794541426e-07, 'fcm_dpo/beta': 0.001867425860837102, 'fcm_dpo/q_t': 0.412939190864563, 'fcm_dpo/delta': 0.025164764374494553, 'fcm_dpo/margin': 201.13601684570312, 'margin_dpo/margin_mean': 201.13601684570312, 'margin_dpo/margin_std': 265.9939270019531, 'logps/chosen': -356.49261474609375, 'logps/rejected': -577.9056396484375, 'logps/ref_chosen': -52.83022689819336, 'logps/ref_rejected': -73.10723114013672, 'KL/chosen_KL_mean': -303.6623840332031, 'KL/rejected_KL_mean': -504.79840087890625, 'KL/mean': -404.23040771484375, 'KL/std': 221.79913330078125, 'logits/chosen': -0.3601798415184021, 'logits/rejected': -0.338324636220932, 'epoch': 0.5} + 50%|████▉ | 340/681 [14:25<14:11, 2.50s/it] 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] {'loss': 1.0527, 'grad_norm': 25.454540252685547, 'learning_rate': 2.946753005532965e-07, 'fcm_dpo/beta': 0.001860608346760273, 'fcm_dpo/q_t': 0.3988453149795532, 'fcm_dpo/delta': -0.03702467307448387, 'fcm_dpo/margin': 233.8590850830078, 'margin_dpo/margin_mean': 233.8590850830078, 'margin_dpo/margin_std': 255.57666015625, 'logps/chosen': -352.9140625, 'logps/rejected': -640.6832275390625, 'logps/ref_chosen': -47.899803161621094, 'logps/ref_rejected': -101.80987548828125, 'KL/chosen_KL_mean': -305.0142517089844, 'KL/rejected_KL_mean': -538.8733520507812, 'KL/mean': -421.94378662109375, 'KL/std': 246.80142211914062, 'logits/chosen': -0.38325321674346924, 'logits/rejected': -0.38400086760520935, 'epoch': 0.5} + 50%|█████ | 341/681 [14:27<13:46, 2.43s/it] 50%|█████ | 342/681 [14:30<14:07, 2.50s/it] {'loss': 1.1045, 'grad_norm': 25.053524017333984, 'learning_rate': 2.934120444167326e-07, 'fcm_dpo/beta': 0.001875395653769374, 'fcm_dpo/q_t': 0.41176915168762207, 'fcm_dpo/delta': 0.018558282405138016, 'fcm_dpo/margin': 203.568115234375, 'margin_dpo/margin_mean': 203.568115234375, 'margin_dpo/margin_std': 277.70416259765625, 'logps/chosen': -380.77825927734375, 'logps/rejected': -604.9393310546875, 'logps/ref_chosen': -71.99664306640625, 'logps/ref_rejected': -92.58959197998047, 'KL/chosen_KL_mean': -308.7816162109375, 'KL/rejected_KL_mean': -512.3497314453125, 'KL/mean': -410.565673828125, 'KL/std': 233.906982421875, 'logits/chosen': -0.42473480105400085, 'logits/rejected': -0.40138232707977295, 'epoch': 0.5} + 50%|█████ | 342/681 [14:30<14:07, 2.50s/it] 50%|█████ | 343/681 [14:32<13:47, 2.45s/it] {'loss': 1.0218, 'grad_norm': 22.644685745239258, 'learning_rate': 2.9214764433242476e-07, 'fcm_dpo/beta': 0.0018535295967012644, 'fcm_dpo/q_t': 0.39244067668914795, 'fcm_dpo/delta': -0.05536113679409027, 'fcm_dpo/margin': 244.30438232421875, 'margin_dpo/margin_mean': 244.30438232421875, 'margin_dpo/margin_std': 211.76568603515625, 'logps/chosen': -354.05804443359375, 'logps/rejected': -654.9982299804688, 'logps/ref_chosen': -54.405616760253906, 'logps/ref_rejected': -111.04142761230469, 'KL/chosen_KL_mean': -299.65240478515625, 'KL/rejected_KL_mean': -543.956787109375, 'KL/mean': -421.80462646484375, 'KL/std': 237.9898223876953, 'logits/chosen': -0.44045504927635193, 'logits/rejected': -0.44200748205184937, 'epoch': 0.5} + 50%|█████ | 343/681 [14:32<13:47, 2.45s/it] 51%|█████ | 344/681 [14:35<13:45, 2.45s/it] {'loss': 1.0795, 'grad_norm': 23.597854614257812, 'learning_rate': 2.9088213361849126e-07, 'fcm_dpo/beta': 0.0018617368768900633, 'fcm_dpo/q_t': 0.4059738516807556, 'fcm_dpo/delta': -0.020948857069015503, 'fcm_dpo/margin': 224.7209014892578, 'margin_dpo/margin_mean': 224.7209014892578, 'margin_dpo/margin_std': 275.2275390625, 'logps/chosen': -352.3494873046875, 'logps/rejected': -613.7291259765625, 'logps/ref_chosen': -53.96466827392578, 'logps/ref_rejected': -90.62336730957031, 'KL/chosen_KL_mean': -298.38482666015625, 'KL/rejected_KL_mean': -523.105712890625, 'KL/mean': -410.74530029296875, 'KL/std': 269.9559326171875, 'logits/chosen': -0.3812987804412842, 'logits/rejected': -0.3833147883415222, 'epoch': 0.51} + 51%|█████ | 344/681 [14:35<13:45, 2.45s/it] 51%|█████ | 345/681 [14:37<14:07, 2.52s/it] {'loss': 1.0538, 'grad_norm': 20.200607299804688, 'learning_rate': 2.896155456223163e-07, 'fcm_dpo/beta': 0.0018341855611652136, 'fcm_dpo/q_t': 0.39786964654922485, 'fcm_dpo/delta': -0.04054499790072441, 'fcm_dpo/margin': 239.21319580078125, 'margin_dpo/margin_mean': 239.21319580078125, 'margin_dpo/margin_std': 273.10797119140625, 'logps/chosen': -416.98779296875, 'logps/rejected': -694.0057373046875, 'logps/ref_chosen': -61.685699462890625, 'logps/ref_rejected': -99.49041748046875, 'KL/chosen_KL_mean': -355.3021240234375, 'KL/rejected_KL_mean': -594.5153198242188, 'KL/mean': -474.9087219238281, 'KL/std': 251.06130981445312, 'logits/chosen': -0.4079459309577942, 'logits/rejected': -0.4065578877925873, 'epoch': 0.51} + 51%|█████ | 345/681 [14:37<14:07, 2.52s/it] 51%|█████ | 346/681 [14:40<13:59, 2.51s/it] {'loss': 1.0737, 'grad_norm': 21.91631317138672, 'learning_rate': 2.883479137196714e-07, 'fcm_dpo/beta': 0.001822044956497848, 'fcm_dpo/q_t': 0.4034339189529419, 'fcm_dpo/delta': -0.012084376066923141, 'fcm_dpo/margin': 225.88394165039062, 'margin_dpo/margin_mean': 225.88394165039062, 'margin_dpo/margin_std': 270.33984375, 'logps/chosen': -410.079345703125, 'logps/rejected': -658.122314453125, 'logps/ref_chosen': -55.256263732910156, 'logps/ref_rejected': -77.41532135009766, 'KL/chosen_KL_mean': -354.8230895996094, 'KL/rejected_KL_mean': -580.70703125, 'KL/mean': -467.76507568359375, 'KL/std': 250.11328125, 'logits/chosen': -0.36091554164886475, 'logits/rejected': -0.35045647621154785, 'epoch': 0.51} + 51%|█████ | 346/681 [14:40<13:59, 2.51s/it] 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] {'loss': 1.083, 'grad_norm': 24.682762145996094, 'learning_rate': 2.8707927131383614e-07, 'fcm_dpo/beta': 0.0018181647174060345, 'fcm_dpo/q_t': 0.40489083528518677, 'fcm_dpo/delta': -0.014934061095118523, 'fcm_dpo/margin': 227.87588500976562, 'margin_dpo/margin_mean': 227.87588500976562, 'margin_dpo/margin_std': 299.0618896484375, 'logps/chosen': -414.01239013671875, 'logps/rejected': -676.6771240234375, 'logps/ref_chosen': -57.56623840332031, 'logps/ref_rejected': -92.35509490966797, 'KL/chosen_KL_mean': -356.4461364746094, 'KL/rejected_KL_mean': -584.322021484375, 'KL/mean': -470.38409423828125, 'KL/std': 266.44903564453125, 'logits/chosen': -0.38032758235931396, 'logits/rejected': -0.37665826082229614, 'epoch': 0.51} + 51%|█████ | 347/681 [14:42<13:23, 2.41s/it] 51%|█████ | 348/681 [14:44<13:22, 2.41s/it] {'loss': 1.1225, 'grad_norm': 23.302974700927734, 'learning_rate': 2.858096518347179e-07, 'fcm_dpo/beta': 0.001820417819544673, 'fcm_dpo/q_t': 0.4191325306892395, 'fcm_dpo/delta': 0.05043090134859085, 'fcm_dpo/margin': 192.81918334960938, 'margin_dpo/margin_mean': 192.81918334960938, 'margin_dpo/margin_std': 263.36297607421875, 'logps/chosen': -371.4599609375, 'logps/rejected': -597.099853515625, 'logps/ref_chosen': -56.31770324707031, 'logps/ref_rejected': -89.13836669921875, 'KL/chosen_KL_mean': -315.14227294921875, 'KL/rejected_KL_mean': -507.9614562988281, 'KL/mean': -411.5518493652344, 'KL/std': 229.25570678710938, 'logits/chosen': -0.4306999444961548, 'logits/rejected': -0.4324670433998108, 'epoch': 0.51} + 51%|█████ | 348/681 [14:45<13:22, 2.41s/it] 51%|█████ | 349/681 [14:47<13:46, 2.49s/it] {'loss': 1.1007, 'grad_norm': 18.977298736572266, 'learning_rate': 2.845390887379706e-07, 'fcm_dpo/beta': 0.0018413200741633773, 'fcm_dpo/q_t': 0.4088849425315857, 'fcm_dpo/delta': -0.003216017037630081, 'fcm_dpo/margin': 218.76055908203125, 'margin_dpo/margin_mean': 218.7605438232422, 'margin_dpo/margin_std': 311.55657958984375, 'logps/chosen': -359.3460693359375, 'logps/rejected': -617.5862426757812, 'logps/ref_chosen': -58.025516510009766, 'logps/ref_rejected': -97.50515747070312, 'KL/chosen_KL_mean': -301.320556640625, 'KL/rejected_KL_mean': -520.0811157226562, 'KL/mean': -410.7008056640625, 'KL/std': 251.19491577148438, 'logits/chosen': -0.38458961248397827, 'logits/rejected': -0.38436293601989746, 'epoch': 0.51} + 51%|█████ | 349/681 [14:47<13:46, 2.49s/it] 51%|█████▏ | 350/681 [14:50<14:09, 2.57s/it] {'loss': 1.1063, 'grad_norm': 30.917598724365234, 'learning_rate': 2.8326761550411346e-07, 'fcm_dpo/beta': 0.0018291505984961987, 'fcm_dpo/q_t': 0.4092941880226135, 'fcm_dpo/delta': 0.010211531072854996, 'fcm_dpo/margin': 213.15390014648438, 'margin_dpo/margin_mean': 213.15390014648438, 'margin_dpo/margin_std': 299.6205749511719, 'logps/chosen': -383.44232177734375, 'logps/rejected': -622.1373291015625, 'logps/ref_chosen': -64.33049011230469, 'logps/ref_rejected': -89.87164306640625, 'KL/chosen_KL_mean': -319.11181640625, 'KL/rejected_KL_mean': -532.2657470703125, 'KL/mean': -425.6887512207031, 'KL/std': 239.318603515625, 'logits/chosen': -0.4026058614253998, 'logits/rejected': -0.40751904249191284, 'epoch': 0.51} + 51%|█████▏ | 350/681 [14:50<14:09, 2.57s/it] 52%|█████▏ | 351/681 [14:52<14:07, 2.57s/it] {'loss': 1.0441, 'grad_norm': 29.79630470275879, 'learning_rate': 2.819952656376487e-07, 'fcm_dpo/beta': 0.001811300404369831, 'fcm_dpo/q_t': 0.3929722309112549, 'fcm_dpo/delta': -0.07729293406009674, 'fcm_dpo/margin': 261.2742004394531, 'margin_dpo/margin_mean': 261.2742004394531, 'margin_dpo/margin_std': 318.12158203125, 'logps/chosen': -342.9754943847656, 'logps/rejected': -645.1429443359375, 'logps/ref_chosen': -60.6721305847168, 'logps/ref_rejected': -101.5654296875, 'KL/chosen_KL_mean': -282.3033447265625, 'KL/rejected_KL_mean': -543.5775146484375, 'KL/mean': -412.9404602050781, 'KL/std': 274.8599853515625, 'logits/chosen': -0.41858357191085815, 'logits/rejected': -0.4184862971305847, 'epoch': 0.52} + 52%|█████▏ | 351/681 [14:53<14:07, 2.57s/it] 52%|█████▏ | 352/681 [14:55<14:07, 2.58s/it] {'loss': 1.1791, 'grad_norm': 34.465721130371094, 'learning_rate': 2.8072207266617854e-07, 'fcm_dpo/beta': 0.0018348516896367073, 'fcm_dpo/q_t': 0.4335269033908844, 'fcm_dpo/delta': 0.11355704069137573, 'fcm_dpo/margin': 157.98960876464844, 'margin_dpo/margin_mean': 157.9896240234375, 'margin_dpo/margin_std': 274.4894104003906, 'logps/chosen': -402.18890380859375, 'logps/rejected': -565.8770751953125, 'logps/ref_chosen': -70.9434585571289, 'logps/ref_rejected': -76.6419677734375, 'KL/chosen_KL_mean': -331.2454528808594, 'KL/rejected_KL_mean': -489.23504638671875, 'KL/mean': -410.2402648925781, 'KL/std': 247.09182739257812, 'logits/chosen': -0.37872931361198425, 'logits/rejected': -0.3453086316585541, 'epoch': 0.52} + 52%|█████▏ | 352/681 [14:55<14:07, 2.58s/it] 52%|█████▏ | 353/681 [14:58<14:00, 2.56s/it] {'loss': 1.1061, 'grad_norm': 25.36398696899414, 'learning_rate': 2.794480701395219e-07, 'fcm_dpo/beta': 0.0018470755312591791, 'fcm_dpo/q_t': 0.4131419360637665, 'fcm_dpo/delta': 0.022442463785409927, 'fcm_dpo/margin': 204.60816955566406, 'margin_dpo/margin_mean': 204.60816955566406, 'margin_dpo/margin_std': 271.8081970214844, 'logps/chosen': -363.52606201171875, 'logps/rejected': -590.074462890625, 'logps/ref_chosen': -58.39533996582031, 'logps/ref_rejected': -80.33553314208984, 'KL/chosen_KL_mean': -305.1307373046875, 'KL/rejected_KL_mean': -509.7388916015625, 'KL/mean': -407.434814453125, 'KL/std': 240.89715576171875, 'logits/chosen': -0.420898973941803, 'logits/rejected': -0.40882444381713867, 'epoch': 0.52} + 52%|█████▏ | 353/681 [14:58<14:00, 2.56s/it] 52%|█████▏ | 354/681 [15:00<14:09, 2.60s/it] {'loss': 1.0535, 'grad_norm': 31.546432495117188, 'learning_rate': 2.781732916288303e-07, 'fcm_dpo/beta': 0.0018517575226724148, 'fcm_dpo/q_t': 0.4013108015060425, 'fcm_dpo/delta': -0.019758004695177078, 'fcm_dpo/margin': 226.20755004882812, 'margin_dpo/margin_mean': 226.20755004882812, 'margin_dpo/margin_std': 229.88156127929688, 'logps/chosen': -307.1639404296875, 'logps/rejected': -562.3260498046875, 'logps/ref_chosen': -59.80299377441406, 'logps/ref_rejected': -88.75750732421875, 'KL/chosen_KL_mean': -247.3609619140625, 'KL/rejected_KL_mean': -473.56854248046875, 'KL/mean': -360.4647216796875, 'KL/std': 221.80062866210938, 'logits/chosen': -0.3748503029346466, 'logits/rejected': -0.3650524616241455, 'epoch': 0.52} + 52%|█████▏ | 354/681 [15:00<14:09, 2.60s/it] 52%|█████▏ | 355/681 [15:03<13:46, 2.53s/it] {'loss': 1.0617, 'grad_norm': 40.25979232788086, 'learning_rate': 2.7689777072570284e-07, 'fcm_dpo/beta': 0.0018464226741343737, 'fcm_dpo/q_t': 0.4039592742919922, 'fcm_dpo/delta': -0.005889484658837318, 'fcm_dpo/margin': 219.6388702392578, 'margin_dpo/margin_mean': 219.6388702392578, 'margin_dpo/margin_std': 220.4207763671875, 'logps/chosen': -306.22052001953125, 'logps/rejected': -554.136962890625, 'logps/ref_chosen': -54.12849807739258, 'logps/ref_rejected': -82.40606689453125, 'KL/chosen_KL_mean': -252.09201049804688, 'KL/rejected_KL_mean': -471.73089599609375, 'KL/mean': -361.91143798828125, 'KL/std': 225.0244140625, 'logits/chosen': -0.4974886476993561, 'logits/rejected': -0.4918820858001709, 'epoch': 0.52} + 52%|█████▏ | 355/681 [15:03<13:46, 2.53s/it] 52%|█████▏ | 356/681 [15:05<14:04, 2.60s/it] {'loss': 1.2406, 'grad_norm': 30.493345260620117, 'learning_rate': 2.7562154104130176e-07, 'fcm_dpo/beta': 0.001856822520494461, 'fcm_dpo/q_t': 0.44722917675971985, 'fcm_dpo/delta': 0.0374276302754879, 'fcm_dpo/margin': 124.59414672851562, 'margin_dpo/margin_mean': 124.59414672851562, 'margin_dpo/margin_std': 293.3489990234375, 'logps/chosen': -377.41156005859375, 'logps/rejected': -513.231201171875, 'logps/ref_chosen': -64.6738052368164, 'logps/ref_rejected': -75.89926147460938, 'KL/chosen_KL_mean': -312.7377624511719, 'KL/rejected_KL_mean': -437.3319091796875, 'KL/mean': -375.03485107421875, 'KL/std': 239.71302795410156, 'logits/chosen': -0.4328617453575134, 'logits/rejected': -0.4156278967857361, 'epoch': 0.52} + 52%|█████▏ | 356/681 [15:05<14:04, 2.60s/it] 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] {'loss': 1.0839, 'grad_norm': 28.883237838745117, 'learning_rate': 2.7434463620546594e-07, 'fcm_dpo/beta': 0.0018584367353469133, 'fcm_dpo/q_t': 0.40836572647094727, 'fcm_dpo/delta': 0.009893104434013367, 'fcm_dpo/margin': 209.9413604736328, 'margin_dpo/margin_mean': 209.9413604736328, 'margin_dpo/margin_std': 243.27175903320312, 'logps/chosen': -332.05938720703125, 'logps/rejected': -576.1160888671875, 'logps/ref_chosen': -52.725799560546875, 'logps/ref_rejected': -86.84115600585938, 'KL/chosen_KL_mean': -279.3336181640625, 'KL/rejected_KL_mean': -489.27496337890625, 'KL/mean': -384.30426025390625, 'KL/std': 236.43228149414062, 'logits/chosen': -0.40963172912597656, 'logits/rejected': -0.3999664783477783, 'epoch': 0.52} + 52%|█████▏ | 357/681 [15:08<14:04, 2.61s/it] 53%|█████▎ | 358/681 [15:11<14:19, 2.66s/it] {'loss': 1.1107, 'grad_norm': 24.633974075317383, 'learning_rate': 2.730670898658255e-07, 'fcm_dpo/beta': 0.001879463205114007, 'fcm_dpo/q_t': 0.4165921211242676, 'fcm_dpo/delta': 0.04035775363445282, 'fcm_dpo/margin': 192.11947631835938, 'margin_dpo/margin_mean': 192.11947631835938, 'margin_dpo/margin_std': 253.2946014404297, 'logps/chosen': -325.2506103515625, 'logps/rejected': -542.5379028320312, 'logps/ref_chosen': -63.20543670654297, 'logps/ref_rejected': -88.373291015625, 'KL/chosen_KL_mean': -262.045166015625, 'KL/rejected_KL_mean': -454.16461181640625, 'KL/mean': -358.10491943359375, 'KL/std': 234.3748016357422, 'logits/chosen': -0.46384721994400024, 'logits/rejected': -0.4513862133026123, 'epoch': 0.53} + 53%|█████▎ | 358/681 [15:11<14:19, 2.66s/it] 53%|█████▎ | 359/681 [15:13<14:09, 2.64s/it] {'loss': 1.0718, 'grad_norm': 30.42852783203125, 'learning_rate': 2.717889356869146e-07, 'fcm_dpo/beta': 0.001871941378340125, 'fcm_dpo/q_t': 0.40334317088127136, 'fcm_dpo/delta': -0.02348851040005684, 'fcm_dpo/margin': 225.623779296875, 'margin_dpo/margin_mean': 225.62379455566406, 'margin_dpo/margin_std': 277.3382873535156, 'logps/chosen': -358.39483642578125, 'logps/rejected': -609.8221435546875, 'logps/ref_chosen': -56.370216369628906, 'logps/ref_rejected': -82.17375183105469, 'KL/chosen_KL_mean': -302.02459716796875, 'KL/rejected_KL_mean': -527.6484375, 'KL/mean': -414.83648681640625, 'KL/std': 234.064453125, 'logits/chosen': -0.3995208442211151, 'logits/rejected': -0.39267587661743164, 'epoch': 0.53} + 53%|█████▎ | 359/681 [15:13<14:09, 2.64s/it] 53%|█████▎ | 360/681 [15:16<13:52, 2.59s/it] {'loss': 1.1038, 'grad_norm': 30.946216583251953, 'learning_rate': 2.7051020734928443e-07, 'fcm_dpo/beta': 0.0018919282592833042, 'fcm_dpo/q_t': 0.4175671935081482, 'fcm_dpo/delta': 0.05358727648854256, 'fcm_dpo/margin': 184.01300048828125, 'margin_dpo/margin_mean': 184.01300048828125, 'margin_dpo/margin_std': 202.7188262939453, 'logps/chosen': -348.6282653808594, 'logps/rejected': -551.019775390625, 'logps/ref_chosen': -51.460384368896484, 'logps/ref_rejected': -69.83892059326172, 'KL/chosen_KL_mean': -297.1678771972656, 'KL/rejected_KL_mean': -481.1808776855469, 'KL/mean': -389.17437744140625, 'KL/std': 202.0519561767578, 'logits/chosen': -0.3726957440376282, 'logits/rejected': -0.3605055510997772, 'epoch': 0.53} + 53%|█████▎ | 360/681 [15:16<13:52, 2.59s/it] 53%|█████▎ | 361/681 [15:19<14:08, 2.65s/it] {'loss': 1.1314, 'grad_norm': 29.356969833374023, 'learning_rate': 2.6923093854861593e-07, 'fcm_dpo/beta': 0.001915230881422758, 'fcm_dpo/q_t': 0.42051640152931213, 'fcm_dpo/delta': 0.05861767381429672, 'fcm_dpo/margin': 179.164306640625, 'margin_dpo/margin_mean': 179.164306640625, 'margin_dpo/margin_std': 260.03955078125, 'logps/chosen': -381.0967712402344, 'logps/rejected': -597.1607666015625, 'logps/ref_chosen': -53.86951446533203, 'logps/ref_rejected': -90.7692642211914, 'KL/chosen_KL_mean': -327.22723388671875, 'KL/rejected_KL_mean': -506.39154052734375, 'KL/mean': -416.80938720703125, 'KL/std': 240.4334716796875, 'logits/chosen': -0.39026200771331787, 'logits/rejected': -0.3884269595146179, 'epoch': 0.53} + 53%|█████▎ | 361/681 [15:19<14:08, 2.65s/it] 53%|█████▎ | 362/681 [15:21<13:53, 2.61s/it] {'loss': 0.9864, 'grad_norm': 28.38374900817871, 'learning_rate': 2.679511629948319e-07, 'fcm_dpo/beta': 0.001872203079983592, 'fcm_dpo/q_t': 0.3787139654159546, 'fcm_dpo/delta': -0.13209237158298492, 'fcm_dpo/margin': 280.12396240234375, 'margin_dpo/margin_mean': 280.12396240234375, 'margin_dpo/margin_std': 268.27490234375, 'logps/chosen': -349.3612060546875, 'logps/rejected': -676.4281005859375, 'logps/ref_chosen': -58.639060974121094, 'logps/ref_rejected': -105.58195495605469, 'KL/chosen_KL_mean': -290.72216796875, 'KL/rejected_KL_mean': -570.84619140625, 'KL/mean': -430.7841796875, 'KL/std': 250.39520263671875, 'logits/chosen': -0.3565158247947693, 'logits/rejected': -0.3644503951072693, 'epoch': 0.53} + 53%|█████▎ | 362/681 [15:21<13:53, 2.61s/it] 53%|█████▎ | 363/681 [15:24<13:22, 2.52s/it] {'loss': 0.9916, 'grad_norm': 22.140954971313477, 'learning_rate': 2.6667091441120816e-07, 'fcm_dpo/beta': 0.0018373643979430199, 'fcm_dpo/q_t': 0.37961655855178833, 'fcm_dpo/delta': -0.12650209665298462, 'fcm_dpo/margin': 283.072509765625, 'margin_dpo/margin_mean': 283.072509765625, 'margin_dpo/margin_std': 275.70379638671875, 'logps/chosen': -321.7727966308594, 'logps/rejected': -634.98193359375, 'logps/ref_chosen': -44.558380126953125, 'logps/ref_rejected': -74.69496154785156, 'KL/chosen_KL_mean': -277.21441650390625, 'KL/rejected_KL_mean': -560.2869262695312, 'KL/mean': -418.75067138671875, 'KL/std': 253.55523681640625, 'logits/chosen': -0.3845895528793335, 'logits/rejected': -0.3806605339050293, 'epoch': 0.53} + 53%|█████▎ | 363/681 [15:24<13:22, 2.52s/it] 53%|█████▎ | 364/681 [15:26<13:26, 2.54s/it] {'loss': 1.0981, 'grad_norm': 24.997861862182617, 'learning_rate': 2.6539022653348575e-07, 'fcm_dpo/beta': 0.0018285869155079126, 'fcm_dpo/q_t': 0.4090085029602051, 'fcm_dpo/delta': 0.006663650274276733, 'fcm_dpo/margin': 215.0734100341797, 'margin_dpo/margin_mean': 215.0734100341797, 'margin_dpo/margin_std': 289.23095703125, 'logps/chosen': -352.01715087890625, 'logps/rejected': -609.5916748046875, 'logps/ref_chosen': -48.894622802734375, 'logps/ref_rejected': -91.395751953125, 'KL/chosen_KL_mean': -303.1225280761719, 'KL/rejected_KL_mean': -518.1959228515625, 'KL/mean': -410.65924072265625, 'KL/std': 250.283447265625, 'logits/chosen': -0.4224643111228943, 'logits/rejected': -0.4338565468788147, 'epoch': 0.53} + 53%|█████▎ | 364/681 [15:26<13:26, 2.54s/it] 54%|█████▎ | 365/681 [15:29<13:30, 2.57s/it] {'loss': 1.0785, 'grad_norm': 24.999404907226562, 'learning_rate': 2.641091331089811e-07, 'fcm_dpo/beta': 0.0018205586820840836, 'fcm_dpo/q_t': 0.4073421359062195, 'fcm_dpo/delta': -0.005390607286244631, 'fcm_dpo/margin': 222.5535430908203, 'margin_dpo/margin_mean': 222.5535430908203, 'margin_dpo/margin_std': 272.5834655761719, 'logps/chosen': -346.01043701171875, 'logps/rejected': -609.77294921875, 'logps/ref_chosen': -51.49274444580078, 'logps/ref_rejected': -92.70166778564453, 'KL/chosen_KL_mean': -294.5177001953125, 'KL/rejected_KL_mean': -517.0712890625, 'KL/mean': -405.79449462890625, 'KL/std': 263.12286376953125, 'logits/chosen': -0.4111742377281189, 'logits/rejected': -0.4230782985687256, 'epoch': 0.54} + 54%|█████▎ | 365/681 [15:29<13:30, 2.57s/it] 54%|█████▎ | 366/681 [15:31<13:28, 2.57s/it] {'loss': 1.0815, 'grad_norm': 21.499807357788086, 'learning_rate': 2.6282766789569736e-07, 'fcm_dpo/beta': 0.001808905741199851, 'fcm_dpo/q_t': 0.4051057696342468, 'fcm_dpo/delta': -0.013614185154438019, 'fcm_dpo/margin': 228.12367248535156, 'margin_dpo/margin_mean': 228.12368774414062, 'margin_dpo/margin_std': 290.66448974609375, 'logps/chosen': -315.0681457519531, 'logps/rejected': -581.7816162109375, 'logps/ref_chosen': -44.7205696105957, 'logps/ref_rejected': -83.31040954589844, 'KL/chosen_KL_mean': -270.3475646972656, 'KL/rejected_KL_mean': -498.47125244140625, 'KL/mean': -384.409423828125, 'KL/std': 244.6814422607422, 'logits/chosen': -0.41397756338119507, 'logits/rejected': -0.4283139109611511, 'epoch': 0.54} + 54%|█████▎ | 366/681 [15:31<13:28, 2.57s/it] 54%|█████▍ | 367/681 [15:34<13:43, 2.62s/it] {'loss': 1.1247, 'grad_norm': 19.561819076538086, 'learning_rate': 2.615458646614349e-07, 'fcm_dpo/beta': 0.0018367799930274487, 'fcm_dpo/q_t': 0.4210050106048584, 'fcm_dpo/delta': 0.06687445938587189, 'fcm_dpo/margin': 182.48397827148438, 'margin_dpo/margin_mean': 182.48397827148438, 'margin_dpo/margin_std': 237.13613891601562, 'logps/chosen': -342.47174072265625, 'logps/rejected': -543.3016357421875, 'logps/ref_chosen': -58.405418395996094, 'logps/ref_rejected': -76.75132751464844, 'KL/chosen_KL_mean': -284.0663146972656, 'KL/rejected_KL_mean': -466.55029296875, 'KL/mean': -375.30828857421875, 'KL/std': 219.5314178466797, 'logits/chosen': -0.418517529964447, 'logits/rejected': -0.40183088183403015, 'epoch': 0.54} + 54%|█████▍ | 367/681 [15:34<13:43, 2.62s/it] 54%|█████▍ | 368/681 [15:37<13:39, 2.62s/it] {'loss': 0.9664, 'grad_norm': 45.23311233520508, 'learning_rate': 2.6026375718290083e-07, 'fcm_dpo/beta': 0.0018038455164059997, 'fcm_dpo/q_t': 0.37479937076568604, 'fcm_dpo/delta': -0.1335085779428482, 'fcm_dpo/margin': 291.8758850097656, 'margin_dpo/margin_mean': 291.8758544921875, 'margin_dpo/margin_std': 225.90078735351562, 'logps/chosen': -311.00830078125, 'logps/rejected': -656.9869384765625, 'logps/ref_chosen': -44.452518463134766, 'logps/ref_rejected': -98.55526733398438, 'KL/chosen_KL_mean': -266.5557861328125, 'KL/rejected_KL_mean': -558.431640625, 'KL/mean': -412.49371337890625, 'KL/std': 249.94715881347656, 'logits/chosen': -0.41935569047927856, 'logits/rejected': -0.4271644651889801, 'epoch': 0.54} + 54%|█████▍ | 368/681 [15:37<13:39, 2.62s/it] 54%|█████▍ | 369/681 [15:39<13:42, 2.64s/it] {'loss': 1.1876, 'grad_norm': 29.0583553314209, 'learning_rate': 2.589813792448196e-07, 'fcm_dpo/beta': 0.001830049091950059, 'fcm_dpo/q_t': 0.43473055958747864, 'fcm_dpo/delta': 0.12243049591779709, 'fcm_dpo/margin': 153.2336883544922, 'margin_dpo/margin_mean': 153.2336883544922, 'margin_dpo/margin_std': 278.4016418457031, 'logps/chosen': -422.2853698730469, 'logps/rejected': -595.433349609375, 'logps/ref_chosen': -71.38150024414062, 'logps/ref_rejected': -91.29582214355469, 'KL/chosen_KL_mean': -350.90386962890625, 'KL/rejected_KL_mean': -504.13751220703125, 'KL/mean': -427.52069091796875, 'KL/std': 248.12368774414062, 'logits/chosen': -0.43031615018844604, 'logits/rejected': -0.413091778755188, 'epoch': 0.54} + 54%|█████▍ | 369/681 [15:39<13:42, 2.64s/it] 54%|█████▍ | 370/681 [15:42<13:46, 2.66s/it] {'loss': 1.1953, 'grad_norm': 32.01681137084961, 'learning_rate': 2.5769876463904263e-07, 'fcm_dpo/beta': 0.001874544657766819, 'fcm_dpo/q_t': 0.43603307008743286, 'fcm_dpo/delta': 0.12432844936847687, 'fcm_dpo/margin': 148.69049072265625, 'margin_dpo/margin_mean': 148.69049072265625, 'margin_dpo/margin_std': 284.5626220703125, 'logps/chosen': -433.19775390625, 'logps/rejected': -607.54052734375, 'logps/ref_chosen': -71.60749816894531, 'logps/ref_rejected': -97.25978088378906, 'KL/chosen_KL_mean': -361.59027099609375, 'KL/rejected_KL_mean': -510.28070068359375, 'KL/mean': -435.93548583984375, 'KL/std': 257.38360595703125, 'logits/chosen': -0.4668412208557129, 'logits/rejected': -0.4614550471305847, 'epoch': 0.54} + 54%|█████▍ | 370/681 [15:42<13:46, 2.66s/it] 54%|█████▍ | 371/681 [15:45<13:47, 2.67s/it] {'loss': 1.0943, 'grad_norm': 31.476709365844727, 'learning_rate': 2.5641594716365744e-07, 'fcm_dpo/beta': 0.0018845757003873587, 'fcm_dpo/q_t': 0.4071364104747772, 'fcm_dpo/delta': -0.007901359349489212, 'fcm_dpo/margin': 216.20925903320312, 'margin_dpo/margin_mean': 216.20925903320312, 'margin_dpo/margin_std': 298.52569580078125, 'logps/chosen': -414.4024658203125, 'logps/rejected': -660.369384765625, 'logps/ref_chosen': -69.41448974609375, 'logps/ref_rejected': -99.17217254638672, 'KL/chosen_KL_mean': -344.98797607421875, 'KL/rejected_KL_mean': -561.197265625, 'KL/mean': -453.09259033203125, 'KL/std': 260.2284851074219, 'logits/chosen': -0.4888390302658081, 'logits/rejected': -0.4776480793952942, 'epoch': 0.54} + 54%|█████▍ | 371/681 [15:45<13:47, 2.67s/it] 55%|█████▍ | 372/681 [15:47<13:07, 2.55s/it] {'loss': 1.0516, 'grad_norm': 24.130836486816406, 'learning_rate': 2.551329606220976e-07, 'fcm_dpo/beta': 0.0018558462616056204, 'fcm_dpo/q_t': 0.39386242628097534, 'fcm_dpo/delta': -0.07873637974262238, 'fcm_dpo/margin': 255.91799926757812, 'margin_dpo/margin_mean': 255.91799926757812, 'margin_dpo/margin_std': 333.7495422363281, 'logps/chosen': -394.7161560058594, 'logps/rejected': -667.3556518554688, 'logps/ref_chosen': -61.8179931640625, 'logps/ref_rejected': -78.53948974609375, 'KL/chosen_KL_mean': -332.8981628417969, 'KL/rejected_KL_mean': -588.816162109375, 'KL/mean': -460.857177734375, 'KL/std': 296.18841552734375, 'logits/chosen': -0.44447630643844604, 'logits/rejected': -0.4267101287841797, 'epoch': 0.55} + 55%|█████▍ | 372/681 [15:47<13:07, 2.55s/it] 55%|█████▍ | 373/681 [15:50<13:10, 2.57s/it] {'loss': 1.0572, 'grad_norm': 27.912851333618164, 'learning_rate': 2.538498388222517e-07, 'fcm_dpo/beta': 0.0018471537623554468, 'fcm_dpo/q_t': 0.3971483111381531, 'fcm_dpo/delta': -0.04569406807422638, 'fcm_dpo/margin': 239.99903869628906, 'margin_dpo/margin_mean': 239.99905395507812, 'margin_dpo/margin_std': 276.5765075683594, 'logps/chosen': -426.01446533203125, 'logps/rejected': -687.7559814453125, 'logps/ref_chosen': -64.21713256835938, 'logps/ref_rejected': -85.95960998535156, 'KL/chosen_KL_mean': -361.79736328125, 'KL/rejected_KL_mean': -601.7963256835938, 'KL/mean': -481.796875, 'KL/std': 286.9070129394531, 'logits/chosen': -0.4145781695842743, 'logits/rejected': -0.3933746814727783, 'epoch': 0.55} + 55%|█████▍ | 373/681 [15:50<13:10, 2.57s/it] 55%|█████▍ | 374/681 [15:52<13:24, 2.62s/it] {'loss': 1.1167, 'grad_norm': 25.18126106262207, 'learning_rate': 2.525666155755725e-07, 'fcm_dpo/beta': 0.0018239655764773488, 'fcm_dpo/q_t': 0.41195404529571533, 'fcm_dpo/delta': 0.0010283365845680237, 'fcm_dpo/margin': 218.50360107421875, 'margin_dpo/margin_mean': 218.50360107421875, 'margin_dpo/margin_std': 344.5791015625, 'logps/chosen': -391.53271484375, 'logps/rejected': -633.0263061523438, 'logps/ref_chosen': -70.65018463134766, 'logps/ref_rejected': -93.64016723632812, 'KL/chosen_KL_mean': -320.88250732421875, 'KL/rejected_KL_mean': -539.3861083984375, 'KL/mean': -430.1343078613281, 'KL/std': 306.24932861328125, 'logits/chosen': -0.5014743208885193, 'logits/rejected': -0.4834766983985901, 'epoch': 0.55} + 55%|█████▍ | 374/681 [15:52<13:24, 2.62s/it] 55%|█████▌ | 375/681 [15:55<13:30, 2.65s/it] {'loss': 1.1046, 'grad_norm': 34.303585052490234, 'learning_rate': 2.512833246961859e-07, 'fcm_dpo/beta': 0.001825918909162283, 'fcm_dpo/q_t': 0.4101504981517792, 'fcm_dpo/delta': 0.011883806437253952, 'fcm_dpo/margin': 212.50088500976562, 'margin_dpo/margin_mean': 212.50088500976562, 'margin_dpo/margin_std': 290.35430908203125, 'logps/chosen': -385.558837890625, 'logps/rejected': -626.9177856445312, 'logps/ref_chosen': -60.080223083496094, 'logps/ref_rejected': -88.93830871582031, 'KL/chosen_KL_mean': -325.4786071777344, 'KL/rejected_KL_mean': -537.9794921875, 'KL/mean': -431.72906494140625, 'KL/std': 243.8807373046875, 'logits/chosen': -0.45803767442703247, 'logits/rejected': -0.4583319425582886, 'epoch': 0.55} + 55%|█████▌ | 375/681 [15:55<13:30, 2.65s/it] 55%|█████▌ | 376/681 [15:58<13:28, 2.65s/it] {'loss': 1.0361, 'grad_norm': 25.428913116455078, 'learning_rate': 2.5e-07, 'fcm_dpo/beta': 0.0018120380118489265, 'fcm_dpo/q_t': 0.3894526958465576, 'fcm_dpo/delta': -0.08552736043930054, 'fcm_dpo/margin': 265.6058654785156, 'margin_dpo/margin_mean': 265.6058654785156, 'margin_dpo/margin_std': 312.51824951171875, 'logps/chosen': -378.6025390625, 'logps/rejected': -687.07470703125, 'logps/ref_chosen': -62.660308837890625, 'logps/ref_rejected': -105.52660369873047, 'KL/chosen_KL_mean': -315.9422302246094, 'KL/rejected_KL_mean': -581.548095703125, 'KL/mean': -448.74517822265625, 'KL/std': 266.8795166015625, 'logits/chosen': -0.44534194469451904, 'logits/rejected': -0.4348585307598114, 'epoch': 0.55} + 55%|█████▌ | 376/681 [15:58<13:28, 2.65s/it] 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] {'loss': 1.0485, 'grad_norm': 34.237945556640625, 'learning_rate': 2.487166753038141e-07, 'fcm_dpo/beta': 0.0017974915681406856, 'fcm_dpo/q_t': 0.3962140679359436, 'fcm_dpo/delta': -0.048895493149757385, 'fcm_dpo/margin': 248.51861572265625, 'margin_dpo/margin_mean': 248.5186004638672, 'margin_dpo/margin_std': 279.26947021484375, 'logps/chosen': -372.7728576660156, 'logps/rejected': -665.5160522460938, 'logps/ref_chosen': -54.478736877441406, 'logps/ref_rejected': -98.70335388183594, 'KL/chosen_KL_mean': -318.29412841796875, 'KL/rejected_KL_mean': -566.812744140625, 'KL/mean': -442.55340576171875, 'KL/std': 271.7928771972656, 'logits/chosen': -0.39339831471443176, 'logits/rejected': -0.39519575238227844, 'epoch': 0.55} + 55%|█████▌ | 377/681 [16:00<13:11, 2.60s/it] 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] {'loss': 1.0276, 'grad_norm': 28.7508544921875, 'learning_rate': 2.4743338442442754e-07, 'fcm_dpo/beta': 0.001769623951986432, 'fcm_dpo/q_t': 0.39068034291267395, 'fcm_dpo/delta': -0.06735318899154663, 'fcm_dpo/margin': 262.3010559082031, 'margin_dpo/margin_mean': 262.3010559082031, 'margin_dpo/margin_std': 266.9130554199219, 'logps/chosen': -351.5135498046875, 'logps/rejected': -656.841064453125, 'logps/ref_chosen': -45.02053451538086, 'logps/ref_rejected': -88.0469741821289, 'KL/chosen_KL_mean': -306.4930114746094, 'KL/rejected_KL_mean': -568.7940673828125, 'KL/mean': -437.6435546875, 'KL/std': 253.0465087890625, 'logits/chosen': -0.4216500520706177, 'logits/rejected': -0.43784886598587036, 'epoch': 0.56} + 56%|█████▌ | 378/681 [16:02<12:28, 2.47s/it] 56%|█████▌ | 379/681 [16:05<12:24, 2.47s/it] {'loss': 1.0472, 'grad_norm': 26.82124137878418, 'learning_rate': 2.461501611777483e-07, 'fcm_dpo/beta': 0.0017398163909092546, 'fcm_dpo/q_t': 0.39443039894104004, 'fcm_dpo/delta': -0.06416111439466476, 'fcm_dpo/margin': 264.68035888671875, 'margin_dpo/margin_mean': 264.68035888671875, 'margin_dpo/margin_std': 312.6787109375, 'logps/chosen': -404.4954833984375, 'logps/rejected': -730.2939453125, 'logps/ref_chosen': -53.182098388671875, 'logps/ref_rejected': -114.3001708984375, 'KL/chosen_KL_mean': -351.31341552734375, 'KL/rejected_KL_mean': -615.9937744140625, 'KL/mean': -483.653564453125, 'KL/std': 259.1260986328125, 'logits/chosen': -0.41827017068862915, 'logits/rejected': -0.4405589699745178, 'epoch': 0.56} + 56%|█████▌ | 379/681 [16:05<12:24, 2.47s/it] 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] {'loss': 1.0301, 'grad_norm': 27.675559997558594, 'learning_rate': 2.4486703937790243e-07, 'fcm_dpo/beta': 0.0017209737561643124, 'fcm_dpo/q_t': 0.3864797353744507, 'fcm_dpo/delta': -0.0916648805141449, 'fcm_dpo/margin': 283.14215087890625, 'margin_dpo/margin_mean': 283.14215087890625, 'margin_dpo/margin_std': 325.3090515136719, 'logps/chosen': -401.767578125, 'logps/rejected': -737.7484130859375, 'logps/ref_chosen': -51.3530387878418, 'logps/ref_rejected': -104.19169616699219, 'KL/chosen_KL_mean': -350.41455078125, 'KL/rejected_KL_mean': -633.5567626953125, 'KL/mean': -491.9856262207031, 'KL/std': 297.56414794921875, 'logits/chosen': -0.42042213678359985, 'logits/rejected': -0.447647362947464, 'epoch': 0.56} + 56%|█████▌ | 380/681 [16:07<12:10, 2.43s/it] 56%|█████▌ | 381/681 [16:10<12:13, 2.44s/it] {'loss': 1.148, 'grad_norm': 28.662925720214844, 'learning_rate': 2.435840528363426e-07, 'fcm_dpo/beta': 0.001720770844258368, 'fcm_dpo/q_t': 0.42006832361221313, 'fcm_dpo/delta': 0.05182623863220215, 'fcm_dpo/margin': 203.4054718017578, 'margin_dpo/margin_mean': 203.40545654296875, 'margin_dpo/margin_std': 341.2312927246094, 'logps/chosen': -430.34332275390625, 'logps/rejected': -655.1651611328125, 'logps/ref_chosen': -57.80306625366211, 'logps/ref_rejected': -79.21940612792969, 'KL/chosen_KL_mean': -372.5402526855469, 'KL/rejected_KL_mean': -575.9457397460938, 'KL/mean': -474.24298095703125, 'KL/std': 250.25369262695312, 'logits/chosen': -0.42481085658073425, 'logits/rejected': -0.4088062047958374, 'epoch': 0.56} + 56%|█████▌ | 381/681 [16:10<12:13, 2.44s/it] 56%|█████▌ | 382/681 [16:12<12:24, 2.49s/it] {'loss': 1.0487, 'grad_norm': 24.968591690063477, 'learning_rate': 2.4230123536095745e-07, 'fcm_dpo/beta': 0.0017197042470797896, 'fcm_dpo/q_t': 0.39874282479286194, 'fcm_dpo/delta': -0.03414086997509003, 'fcm_dpo/margin': 251.59266662597656, 'margin_dpo/margin_mean': 251.59266662597656, 'margin_dpo/margin_std': 264.68463134765625, 'logps/chosen': -434.68548583984375, 'logps/rejected': -730.968017578125, 'logps/ref_chosen': -66.02030181884766, 'logps/ref_rejected': -110.71016693115234, 'KL/chosen_KL_mean': -368.66522216796875, 'KL/rejected_KL_mean': -620.2578125, 'KL/mean': -494.4615173339844, 'KL/std': 243.38522338867188, 'logits/chosen': -0.4726359248161316, 'logits/rejected': -0.47993141412734985, 'epoch': 0.56} + 56%|█████▌ | 382/681 [16:12<12:24, 2.49s/it] 56%|█████▌ | 383/681 [16:15<12:38, 2.54s/it] {'loss': 1.1069, 'grad_norm': 30.020652770996094, 'learning_rate': 2.4101862075518037e-07, 'fcm_dpo/beta': 0.0017115201335400343, 'fcm_dpo/q_t': 0.4055634140968323, 'fcm_dpo/delta': -0.012677527032792568, 'fcm_dpo/margin': 240.79074096679688, 'margin_dpo/margin_mean': 240.79074096679688, 'margin_dpo/margin_std': 365.572021484375, 'logps/chosen': -446.63189697265625, 'logps/rejected': -730.7470703125, 'logps/ref_chosen': -50.39148712158203, 'logps/ref_rejected': -93.71589660644531, 'KL/chosen_KL_mean': -396.24041748046875, 'KL/rejected_KL_mean': -637.0311889648438, 'KL/mean': -516.6358032226562, 'KL/std': 278.3402099609375, 'logits/chosen': -0.4480942487716675, 'logits/rejected': -0.4583819806575775, 'epoch': 0.56} + 56%|█████▌ | 383/681 [16:15<12:38, 2.54s/it] 56%|█████▋ | 384/681 [16:17<12:35, 2.55s/it] {'loss': 1.1132, 'grad_norm': 25.613414764404297, 'learning_rate': 2.397362428170992e-07, 'fcm_dpo/beta': 0.0017325121443718672, 'fcm_dpo/q_t': 0.418599933385849, 'fcm_dpo/delta': 0.055945903062820435, 'fcm_dpo/margin': 199.46575927734375, 'margin_dpo/margin_mean': 199.46575927734375, 'margin_dpo/margin_std': 242.4035186767578, 'logps/chosen': -448.14892578125, 'logps/rejected': -681.3294677734375, 'logps/ref_chosen': -52.046104431152344, 'logps/ref_rejected': -85.76089477539062, 'KL/chosen_KL_mean': -396.1028137207031, 'KL/rejected_KL_mean': -595.568603515625, 'KL/mean': -495.835693359375, 'KL/std': 259.7392578125, 'logits/chosen': -0.5003800392150879, 'logits/rejected': -0.4956563413143158, 'epoch': 0.56} + 56%|█████▋ | 384/681 [16:18<12:35, 2.55s/it] 57%|█████▋ | 385/681 [16:20<12:28, 2.53s/it] {'loss': 1.054, 'grad_norm': 34.80295181274414, 'learning_rate': 2.3845413533856514e-07, 'fcm_dpo/beta': 0.0017262771725654602, 'fcm_dpo/q_t': 0.40087053179740906, 'fcm_dpo/delta': -0.019818957895040512, 'fcm_dpo/margin': 242.7109832763672, 'margin_dpo/margin_mean': 242.71096801757812, 'margin_dpo/margin_std': 247.6511993408203, 'logps/chosen': -446.891357421875, 'logps/rejected': -701.8780517578125, 'logps/ref_chosen': -65.55215454101562, 'logps/ref_rejected': -77.82792663574219, 'KL/chosen_KL_mean': -381.33917236328125, 'KL/rejected_KL_mean': -624.0501708984375, 'KL/mean': -502.6947021484375, 'KL/std': 228.0861053466797, 'logits/chosen': -0.5185421705245972, 'logits/rejected': -0.4977598786354065, 'epoch': 0.57} + 57%|█████▋ | 385/681 [16:20<12:28, 2.53s/it] 57%|█████▋ | 386/681 [16:23<12:33, 2.55s/it] {'loss': 1.0673, 'grad_norm': 34.90454864501953, 'learning_rate': 2.3717233210430254e-07, 'fcm_dpo/beta': 0.0017148086335510015, 'fcm_dpo/q_t': 0.39896559715270996, 'fcm_dpo/delta': -0.044420357793569565, 'fcm_dpo/margin': 258.03277587890625, 'margin_dpo/margin_mean': 258.03277587890625, 'margin_dpo/margin_std': 333.954833984375, 'logps/chosen': -449.3065490722656, 'logps/rejected': -741.4448852539062, 'logps/ref_chosen': -58.22185516357422, 'logps/ref_rejected': -92.32742309570312, 'KL/chosen_KL_mean': -391.0846862792969, 'KL/rejected_KL_mean': -649.117431640625, 'KL/mean': -520.10107421875, 'KL/std': 283.81964111328125, 'logits/chosen': -0.5139098167419434, 'logits/rejected': -0.5114161968231201, 'epoch': 0.57} + 57%|█████▋ | 386/681 [16:23<12:33, 2.55s/it] 57%|█████▋ | 387/681 [16:25<12:48, 2.61s/it] {'loss': 1.0981, 'grad_norm': 33.19843292236328, 'learning_rate': 2.3589086689101889e-07, 'fcm_dpo/beta': 0.0017092199996113777, 'fcm_dpo/q_t': 0.412253201007843, 'fcm_dpo/delta': 0.027111487463116646, 'fcm_dpo/margin': 218.617919921875, 'margin_dpo/margin_mean': 218.617919921875, 'margin_dpo/margin_std': 265.804443359375, 'logps/chosen': -475.0291748046875, 'logps/rejected': -719.3968505859375, 'logps/ref_chosen': -66.41944885253906, 'logps/ref_rejected': -92.16915893554688, 'KL/chosen_KL_mean': -408.6097412109375, 'KL/rejected_KL_mean': -627.2276611328125, 'KL/mean': -517.918701171875, 'KL/std': 263.1587219238281, 'logits/chosen': -0.5541732311248779, 'logits/rejected': -0.5390141010284424, 'epoch': 0.57} + 57%|█████▋ | 387/681 [16:25<12:48, 2.61s/it] 57%|█████▋ | 388/681 [16:28<12:23, 2.54s/it] {'loss': 1.0239, 'grad_norm': 31.615806579589844, 'learning_rate': 2.3460977346651428e-07, 'fcm_dpo/beta': 0.0016880175098776817, 'fcm_dpo/q_t': 0.387167364358902, 'fcm_dpo/delta': -0.10291901975870132, 'fcm_dpo/margin': 294.6571350097656, 'margin_dpo/margin_mean': 294.65716552734375, 'margin_dpo/margin_std': 341.442138671875, 'logps/chosen': -427.8636779785156, 'logps/rejected': -776.8244018554688, 'logps/ref_chosen': -50.129459381103516, 'logps/ref_rejected': -104.43305969238281, 'KL/chosen_KL_mean': -377.7342224121094, 'KL/rejected_KL_mean': -672.391357421875, 'KL/mean': -525.062744140625, 'KL/std': 302.99163818359375, 'logits/chosen': -0.49094468355178833, 'logits/rejected': -0.5020414590835571, 'epoch': 0.57} + 57%|█████▋ | 388/681 [16:28<12:23, 2.54s/it] 57%|█████▋ | 389/681 [16:30<12:09, 2.50s/it] {'loss': 1.083, 'grad_norm': 31.12624740600586, 'learning_rate': 2.3332908558879177e-07, 'fcm_dpo/beta': 0.001677666325122118, 'fcm_dpo/q_t': 0.4049556255340576, 'fcm_dpo/delta': -0.013942467980086803, 'fcm_dpo/margin': 246.35403442382812, 'margin_dpo/margin_mean': 246.35403442382812, 'margin_dpo/margin_std': 321.620361328125, 'logps/chosen': -460.9317626953125, 'logps/rejected': -727.293701171875, 'logps/ref_chosen': -57.906593322753906, 'logps/ref_rejected': -77.91454315185547, 'KL/chosen_KL_mean': -403.025146484375, 'KL/rejected_KL_mean': -649.379150390625, 'KL/mean': -526.2021484375, 'KL/std': 294.0400390625, 'logits/chosen': -0.5525184273719788, 'logits/rejected': -0.5475004315376282, 'epoch': 0.57} + 57%|█████▋ | 389/681 [16:30<12:09, 2.50s/it] 57%|█████▋ | 390/681 [16:33<12:07, 2.50s/it] {'loss': 1.1036, 'grad_norm': 22.825424194335938, 'learning_rate': 2.320488370051681e-07, 'fcm_dpo/beta': 0.0016733764205127954, 'fcm_dpo/q_t': 0.4098580479621887, 'fcm_dpo/delta': -0.00393829308450222, 'fcm_dpo/margin': 241.19834899902344, 'margin_dpo/margin_mean': 241.19834899902344, 'margin_dpo/margin_std': 353.98956298828125, 'logps/chosen': -431.4993896484375, 'logps/rejected': -709.0, 'logps/ref_chosen': -49.22591781616211, 'logps/ref_rejected': -85.5281982421875, 'KL/chosen_KL_mean': -382.27349853515625, 'KL/rejected_KL_mean': -623.4718017578125, 'KL/mean': -502.8726806640625, 'KL/std': 278.3486022949219, 'logits/chosen': -0.4611801207065582, 'logits/rejected': -0.45201367139816284, 'epoch': 0.57} + 57%|█████▋ | 390/681 [16:33<12:07, 2.50s/it] 57%|█████▋ | 391/681 [16:35<12:02, 2.49s/it] {'loss': 1.2116, 'grad_norm': 40.434242248535156, 'learning_rate': 2.3076906145138405e-07, 'fcm_dpo/beta': 0.0017185378819704056, 'fcm_dpo/q_t': 0.44371968507766724, 'fcm_dpo/delta': 0.1594843566417694, 'fcm_dpo/margin': 142.20448303222656, 'margin_dpo/margin_mean': 142.20448303222656, 'margin_dpo/margin_std': 274.74224853515625, 'logps/chosen': -438.2511291503906, 'logps/rejected': -602.8641357421875, 'logps/ref_chosen': -64.32965087890625, 'logps/ref_rejected': -86.73820495605469, 'KL/chosen_KL_mean': -373.9214782714844, 'KL/rejected_KL_mean': -516.1259765625, 'KL/mean': -445.02374267578125, 'KL/std': 262.6861572265625, 'logits/chosen': -0.47930610179901123, 'logits/rejected': -0.4707353711128235, 'epoch': 0.57} + 57%|█████▋ | 391/681 [16:35<12:02, 2.49s/it] 58%|█████▊ | 392/681 [16:38<12:15, 2.54s/it] {'loss': 1.0162, 'grad_norm': 23.396804809570312, 'learning_rate': 2.294897926507156e-07, 'fcm_dpo/beta': 0.0017193170497193933, 'fcm_dpo/q_t': 0.389517605304718, 'fcm_dpo/delta': -0.076762355864048, 'fcm_dpo/margin': 275.17437744140625, 'margin_dpo/margin_mean': 275.17437744140625, 'margin_dpo/margin_std': 261.0281066894531, 'logps/chosen': -363.20159912109375, 'logps/rejected': -687.2177734375, 'logps/ref_chosen': -53.50397872924805, 'logps/ref_rejected': -102.34584045410156, 'KL/chosen_KL_mean': -309.6976318359375, 'KL/rejected_KL_mean': -584.8719482421875, 'KL/mean': -447.2847900390625, 'KL/std': 268.35784912109375, 'logits/chosen': -0.4492862820625305, 'logits/rejected': -0.44523316621780396, 'epoch': 0.58} + 58%|█████▊ | 392/681 [16:38<12:15, 2.54s/it] 58%|█████▊ | 393/681 [16:40<11:58, 2.50s/it] {'loss': 1.1278, 'grad_norm': 21.774555206298828, 'learning_rate': 2.2821106431308543e-07, 'fcm_dpo/beta': 0.001711581600829959, 'fcm_dpo/q_t': 0.4182642698287964, 'fcm_dpo/delta': 0.03485105559229851, 'fcm_dpo/margin': 214.08511352539062, 'margin_dpo/margin_mean': 214.0851287841797, 'margin_dpo/margin_std': 337.0278015136719, 'logps/chosen': -351.63958740234375, 'logps/rejected': -591.2196044921875, 'logps/ref_chosen': -46.473915100097656, 'logps/ref_rejected': -71.96885681152344, 'KL/chosen_KL_mean': -305.1656494140625, 'KL/rejected_KL_mean': -519.2507934570312, 'KL/mean': -412.20819091796875, 'KL/std': 271.5501708984375, 'logits/chosen': -0.44074547290802, 'logits/rejected': -0.4389492869377136, 'epoch': 0.58} + 58%|█████▊ | 393/681 [16:40<11:58, 2.50s/it] 58%|█████▊ | 394/681 [16:43<12:14, 2.56s/it] {'loss': 1.0818, 'grad_norm': 26.771778106689453, 'learning_rate': 2.2693291013417452e-07, 'fcm_dpo/beta': 0.0017150124767795205, 'fcm_dpo/q_t': 0.4073120653629303, 'fcm_dpo/delta': -0.0024520214647054672, 'fcm_dpo/margin': 234.589599609375, 'margin_dpo/margin_mean': 234.589599609375, 'margin_dpo/margin_std': 290.7215576171875, 'logps/chosen': -387.1803283691406, 'logps/rejected': -659.6810302734375, 'logps/ref_chosen': -52.91154861450195, 'logps/ref_rejected': -90.8226318359375, 'KL/chosen_KL_mean': -334.2687683105469, 'KL/rejected_KL_mean': -568.8583984375, 'KL/mean': -451.5635681152344, 'KL/std': 285.47515869140625, 'logits/chosen': -0.44740962982177734, 'logits/rejected': -0.4476820230484009, 'epoch': 0.58} + 58%|█████▊ | 394/681 [16:43<12:14, 2.56s/it] 58%|█████▊ | 395/681 [16:45<11:53, 2.50s/it] {'loss': 1.0744, 'grad_norm': 27.568639755249023, 'learning_rate': 2.2565536379453404e-07, 'fcm_dpo/beta': 0.001704660477116704, 'fcm_dpo/q_t': 0.4029311537742615, 'fcm_dpo/delta': -0.025200337171554565, 'fcm_dpo/margin': 248.63189697265625, 'margin_dpo/margin_mean': 248.63189697265625, 'margin_dpo/margin_std': 315.91448974609375, 'logps/chosen': -393.2033386230469, 'logps/rejected': -663.07177734375, 'logps/ref_chosen': -62.546112060546875, 'logps/ref_rejected': -83.78262329101562, 'KL/chosen_KL_mean': -330.6572265625, 'KL/rejected_KL_mean': -579.2891845703125, 'KL/mean': -454.97314453125, 'KL/std': 274.084228515625, 'logits/chosen': -0.5028017163276672, 'logits/rejected': -0.49913692474365234, 'epoch': 0.58} + 58%|█████▊ | 395/681 [16:45<11:53, 2.50s/it] 58%|█████▊ | 396/681 [16:48<11:55, 2.51s/it] {'loss': 1.0763, 'grad_norm': 20.283159255981445, 'learning_rate': 2.2437845895869825e-07, 'fcm_dpo/beta': 0.0017089219763875008, 'fcm_dpo/q_t': 0.4084508419036865, 'fcm_dpo/delta': 0.010719288140535355, 'fcm_dpo/margin': 228.02923583984375, 'margin_dpo/margin_mean': 228.02923583984375, 'margin_dpo/margin_std': 249.16293334960938, 'logps/chosen': -408.6126708984375, 'logps/rejected': -656.2926025390625, 'logps/ref_chosen': -68.99594116210938, 'logps/ref_rejected': -88.64665985107422, 'KL/chosen_KL_mean': -339.6167297363281, 'KL/rejected_KL_mean': -567.64599609375, 'KL/mean': -453.63134765625, 'KL/std': 269.79595947265625, 'logits/chosen': -0.4966447353363037, 'logits/rejected': -0.4823087155818939, 'epoch': 0.58} + 58%|█████▊ | 396/681 [16:48<11:55, 2.51s/it] 58%|█████▊ | 397/681 [16:50<11:50, 2.50s/it] {'loss': 1.0206, 'grad_norm': 31.900684356689453, 'learning_rate': 2.2310222927429716e-07, 'fcm_dpo/beta': 0.0016858780290931463, 'fcm_dpo/q_t': 0.38912802934646606, 'fcm_dpo/delta': -0.07868388295173645, 'fcm_dpo/margin': 281.4169921875, 'margin_dpo/margin_mean': 281.4169921875, 'margin_dpo/margin_std': 282.707275390625, 'logps/chosen': -393.007080078125, 'logps/rejected': -716.2630004882812, 'logps/ref_chosen': -61.27716827392578, 'logps/ref_rejected': -103.11612701416016, 'KL/chosen_KL_mean': -331.72991943359375, 'KL/rejected_KL_mean': -613.1468505859375, 'KL/mean': -472.43841552734375, 'KL/std': 266.5982360839844, 'logits/chosen': -0.45149320363998413, 'logits/rejected': -0.45756763219833374, 'epoch': 0.58} + 58%|█████▊ | 397/681 [16:50<11:50, 2.50s/it] 58%|█████▊ | 398/681 [16:53<11:34, 2.45s/it] {'loss': 1.0615, 'grad_norm': 24.273387908935547, 'learning_rate': 2.2182670837116972e-07, 'fcm_dpo/beta': 0.0016720399726182222, 'fcm_dpo/q_t': 0.39866209030151367, 'fcm_dpo/delta': -0.04650367423892021, 'fcm_dpo/margin': 265.7738342285156, 'margin_dpo/margin_mean': 265.7738342285156, 'margin_dpo/margin_std': 334.2044677734375, 'logps/chosen': -424.8125915527344, 'logps/rejected': -730.95849609375, 'logps/ref_chosen': -68.15155029296875, 'logps/ref_rejected': -108.52360534667969, 'KL/chosen_KL_mean': -356.6610412597656, 'KL/rejected_KL_mean': -622.4349365234375, 'KL/mean': -489.5479736328125, 'KL/std': 285.5445556640625, 'logits/chosen': -0.5021190643310547, 'logits/rejected': -0.4984011650085449, 'epoch': 0.58} + 58%|█████▊ | 398/681 [16:53<11:34, 2.45s/it] 59%|█████▊ | 399/681 [16:55<11:37, 2.47s/it] {'loss': 1.1112, 'grad_norm': 26.480926513671875, 'learning_rate': 2.2055192986047804e-07, 'fcm_dpo/beta': 0.0016683805733919144, 'fcm_dpo/q_t': 0.4121752977371216, 'fcm_dpo/delta': 0.019710222259163857, 'fcm_dpo/margin': 228.27645874023438, 'margin_dpo/margin_mean': 228.27645874023438, 'margin_dpo/margin_std': 323.93865966796875, 'logps/chosen': -383.94122314453125, 'logps/rejected': -629.29345703125, 'logps/ref_chosen': -60.889801025390625, 'logps/ref_rejected': -77.965576171875, 'KL/chosen_KL_mean': -323.0514221191406, 'KL/rejected_KL_mean': -551.327880859375, 'KL/mean': -437.18963623046875, 'KL/std': 256.7247314453125, 'logits/chosen': -0.45461803674697876, 'logits/rejected': -0.4152987003326416, 'epoch': 0.59} + 59%|█████▊ | 399/681 [16:55<11:37, 2.47s/it] 59%|█████▊ | 400/681 [16:58<11:39, 2.49s/it] {'loss': 0.9622, 'grad_norm': 19.69918441772461, 'learning_rate': 2.192779273338215e-07, 'fcm_dpo/beta': 0.0016302517615258694, 'fcm_dpo/q_t': 0.3687145709991455, 'fcm_dpo/delta': -0.17095670104026794, 'fcm_dpo/margin': 344.0451354980469, 'margin_dpo/margin_mean': 344.04510498046875, 'margin_dpo/margin_std': 313.25408935546875, 'logps/chosen': -379.91363525390625, 'logps/rejected': -765.5678100585938, 'logps/ref_chosen': -63.64359664916992, 'logps/ref_rejected': -105.252685546875, 'KL/chosen_KL_mean': -316.2700500488281, 'KL/rejected_KL_mean': -660.3151245117188, 'KL/mean': -488.2925720214844, 'KL/std': 281.55780029296875, 'logits/chosen': -0.43350642919540405, 'logits/rejected': -0.4296361804008484, 'epoch': 0.59} + 59%|█████▊ | 400/681 [16:58<11:39, 2.49s/it] 59%|█████▉ | 401/681 [17:00<11:43, 2.51s/it] {'loss': 1.1887, 'grad_norm': 24.62172508239746, 'learning_rate': 2.1800473436235136e-07, 'fcm_dpo/beta': 0.00163645064458251, 'fcm_dpo/q_t': 0.42967379093170166, 'fcm_dpo/delta': 0.08519099652767181, 'fcm_dpo/margin': 194.07742309570312, 'margin_dpo/margin_mean': 194.07742309570312, 'margin_dpo/margin_std': 388.0357666015625, 'logps/chosen': -421.0069580078125, 'logps/rejected': -641.7138671875, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'KL/chosen_KL_mean': -363.84393310546875, 'KL/rejected_KL_mean': -557.92138671875, 'KL/mean': -460.88262939453125, 'KL/std': 290.2945861816406, 'logits/chosen': -0.4492917060852051, 'logits/rejected': -0.44258540868759155, 'epoch': 0.59} + 59%|█████▉ | 401/681 [17:00<11:43, 2.51s/it] 59%|█████▉ | 402/681 [17:02<11:22, 2.45s/it] {'loss': 0.9533, 'grad_norm': 21.816125869750977, 'learning_rate': 2.1673238449588665e-07, 'fcm_dpo/beta': 0.0016029919497668743, 'fcm_dpo/q_t': 0.36773985624313354, 'fcm_dpo/delta': -0.18321484327316284, 'fcm_dpo/margin': 357.2535400390625, 'margin_dpo/margin_mean': 357.2535400390625, 'margin_dpo/margin_std': 325.81842041015625, 'logps/chosen': -328.19024658203125, 'logps/rejected': -715.74951171875, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'KL/chosen_KL_mean': -277.44989013671875, 'KL/rejected_KL_mean': -634.7034301757812, 'KL/mean': -456.07666015625, 'KL/std': 316.1929016113281, 'logits/chosen': -0.4676339626312256, 'logits/rejected': -0.4571627974510193, 'epoch': 0.59} + 59%|█████▉ | 402/681 [17:02<11:22, 2.45s/it] 59%|█████▉ | 403/681 [17:05<11:18, 2.44s/it] {'loss': 1.0603, 'grad_norm': 27.178544998168945, 'learning_rate': 2.154609112620295e-07, 'fcm_dpo/beta': 0.0015804520808160305, 'fcm_dpo/q_t': 0.4006895124912262, 'fcm_dpo/delta': -0.029699519276618958, 'fcm_dpo/margin': 271.06561279296875, 'margin_dpo/margin_mean': 271.06561279296875, 'margin_dpo/margin_std': 310.4146423339844, 'logps/chosen': -361.19696044921875, 'logps/rejected': -662.3818359375, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'KL/chosen_KL_mean': -314.04962158203125, 'KL/rejected_KL_mean': -585.115234375, 'KL/mean': -449.58245849609375, 'KL/std': 293.69842529296875, 'logits/chosen': -0.4410826563835144, 'logits/rejected': -0.4390965700149536, 'epoch': 0.59} + 59%|█████▉ | 403/681 [17:05<11:18, 2.44s/it] 59%|█████▉ | 404/681 [17:07<11:20, 2.45s/it] {'loss': 1.0903, 'grad_norm': 29.41777992248535, 'learning_rate': 2.1419034816528218e-07, 'fcm_dpo/beta': 0.0015735691413283348, 'fcm_dpo/q_t': 0.40503576397895813, 'fcm_dpo/delta': -0.015212337486445904, 'fcm_dpo/margin': 263.43988037109375, 'margin_dpo/margin_mean': 263.43988037109375, 'margin_dpo/margin_std': 362.440673828125, 'logps/chosen': -397.69647216796875, 'logps/rejected': -690.4160766601562, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'KL/chosen_KL_mean': -349.8211975097656, 'KL/rejected_KL_mean': -613.2611083984375, 'KL/mean': -481.5411376953125, 'KL/std': 281.17803955078125, 'logits/chosen': -0.46026161313056946, 'logits/rejected': -0.45394134521484375, 'epoch': 0.59} + 59%|█████▉ | 404/681 [17:07<11:20, 2.45s/it] 59%|█████▉ | 405/681 [17:10<11:04, 2.41s/it] {'loss': 1.1669, 'grad_norm': 35.01424789428711, 'learning_rate': 2.129207286861638e-07, 'fcm_dpo/beta': 0.0015670396387577057, 'fcm_dpo/q_t': 0.4239059090614319, 'fcm_dpo/delta': -0.04510403424501419, 'fcm_dpo/margin': 217.84002685546875, 'margin_dpo/margin_mean': 217.84002685546875, 'margin_dpo/margin_std': 392.323486328125, 'logps/chosen': -461.0186767578125, 'logps/rejected': -700.882568359375, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'KL/chosen_KL_mean': -395.85577392578125, 'KL/rejected_KL_mean': -613.69580078125, 'KL/mean': -504.7757873535156, 'KL/std': 314.71160888671875, 'logits/chosen': -0.4418267011642456, 'logits/rejected': -0.43295902013778687, 'epoch': 0.59} + 59%|█████▉ | 405/681 [17:10<11:04, 2.41s/it] 60%|█████▉ | 406/681 [17:12<11:06, 2.42s/it] {'loss': 1.0625, 'grad_norm': 30.385452270507812, 'learning_rate': 2.1165208628032861e-07, 'fcm_dpo/beta': 0.0015584398061037064, 'fcm_dpo/q_t': 0.40036964416503906, 'fcm_dpo/delta': -0.03671257197856903, 'fcm_dpo/margin': 279.05023193359375, 'margin_dpo/margin_mean': 279.05023193359375, 'margin_dpo/margin_std': 336.1007995605469, 'logps/chosen': -403.7601623535156, 'logps/rejected': -725.148193359375, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'KL/chosen_KL_mean': -354.01934814453125, 'KL/rejected_KL_mean': -633.069580078125, 'KL/mean': -493.54443359375, 'KL/std': 308.9290771484375, 'logits/chosen': -0.46909600496292114, 'logits/rejected': -0.47889643907546997, 'epoch': 0.6} + 60%|█████▉ | 406/681 [17:12<11:06, 2.42s/it] 60%|█████▉ | 407/681 [17:15<11:09, 2.44s/it] {'loss': 1.1926, 'grad_norm': 31.04163360595703, 'learning_rate': 2.1038445437768375e-07, 'fcm_dpo/beta': 0.001549946959130466, 'fcm_dpo/q_t': 0.4347270131111145, 'fcm_dpo/delta': 0.008951360359787941, 'fcm_dpo/margin': 178.3841552734375, 'margin_dpo/margin_mean': 178.38414001464844, 'margin_dpo/margin_std': 325.7279968261719, 'logps/chosen': -433.6413269042969, 'logps/rejected': -633.2068481445312, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.51209259033203, 'KL/chosen_KL_mean': -377.31060791015625, 'KL/rejected_KL_mean': -555.6947631835938, 'KL/mean': -466.502685546875, 'KL/std': 247.73455810546875, 'logits/chosen': -0.4896223545074463, 'logits/rejected': -0.46283426880836487, 'epoch': 0.6} + 60%|█████▉ | 407/681 [17:15<11:09, 2.44s/it] 60%|█████▉ | 408/681 [17:17<11:26, 2.52s/it] {'loss': 1.1374, 'grad_norm': 24.999143600463867, 'learning_rate': 2.0911786638150872e-07, 'fcm_dpo/beta': 0.0015726467827335, 'fcm_dpo/q_t': 0.42552345991134644, 'fcm_dpo/delta': 0.0853329598903656, 'fcm_dpo/margin': 201.75115966796875, 'margin_dpo/margin_mean': 201.75115966796875, 'margin_dpo/margin_std': 270.3141784667969, 'logps/chosen': -451.62713623046875, 'logps/rejected': -673.6859130859375, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'KL/chosen_KL_mean': -381.83782958984375, 'KL/rejected_KL_mean': -583.5889892578125, 'KL/mean': -482.71337890625, 'KL/std': 239.08734130859375, 'logits/chosen': -0.4955484867095947, 'logits/rejected': -0.47467708587646484, 'epoch': 0.6} + 60%|█████▉ | 408/681 [17:17<11:26, 2.52s/it] 60%|██████ | 409/681 [17:20<11:37, 2.56s/it] {'loss': 1.145, 'grad_norm': 29.315763473510742, 'learning_rate': 2.0785235566757517e-07, 'fcm_dpo/beta': 0.0015998759772628546, 'fcm_dpo/q_t': 0.4262683391571045, 'fcm_dpo/delta': 0.0850619375705719, 'fcm_dpo/margin': 198.47232055664062, 'margin_dpo/margin_mean': 198.47232055664062, 'margin_dpo/margin_std': 289.51141357421875, 'logps/chosen': -431.96002197265625, 'logps/rejected': -648.0191650390625, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'KL/chosen_KL_mean': -364.642578125, 'KL/rejected_KL_mean': -563.1149291992188, 'KL/mean': -463.87872314453125, 'KL/std': 257.52569580078125, 'logits/chosen': -0.4769352376461029, 'logits/rejected': -0.4611578583717346, 'epoch': 0.6} + 60%|██████ | 409/681 [17:20<11:37, 2.56s/it] 60%|██████ | 410/681 [17:23<11:40, 2.59s/it] {'loss': 1.0976, 'grad_norm': 26.693159103393555, 'learning_rate': 2.065879555832674e-07, 'fcm_dpo/beta': 0.0016098904889076948, 'fcm_dpo/q_t': 0.41175514459609985, 'fcm_dpo/delta': 0.027035847306251526, 'fcm_dpo/margin': 232.30197143554688, 'margin_dpo/margin_mean': 232.30198669433594, 'margin_dpo/margin_std': 286.159912109375, 'logps/chosen': -390.648681640625, 'logps/rejected': -654.684326171875, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'KL/chosen_KL_mean': -339.183349609375, 'KL/rejected_KL_mean': -571.4853515625, 'KL/mean': -455.3343505859375, 'KL/std': 257.6640319824219, 'logits/chosen': -0.5109409093856812, 'logits/rejected': -0.5129973888397217, 'epoch': 0.6} + 60%|██████ | 410/681 [17:23<11:40, 2.59s/it] 60%|██████ | 411/681 [17:25<11:17, 2.51s/it] {'loss': 1.1053, 'grad_norm': 24.12554931640625, 'learning_rate': 2.0532469944670343e-07, 'fcm_dpo/beta': 0.0016047862591221929, 'fcm_dpo/q_t': 0.41245776414871216, 'fcm_dpo/delta': 0.015297271311283112, 'fcm_dpo/margin': 239.392578125, 'margin_dpo/margin_mean': 239.392578125, 'margin_dpo/margin_std': 321.0933532714844, 'logps/chosen': -406.6169738769531, 'logps/rejected': -674.397216796875, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'KL/chosen_KL_mean': -354.3096923828125, 'KL/rejected_KL_mean': -593.7022705078125, 'KL/mean': -474.0059814453125, 'KL/std': 283.52850341796875, 'logits/chosen': -0.5120722055435181, 'logits/rejected': -0.520818293094635, 'epoch': 0.6} + 60%|██████ | 411/681 [17:25<11:17, 2.51s/it] 60%|██████ | 412/681 [17:27<11:07, 2.48s/it] {'loss': 1.0918, 'grad_norm': 33.52204132080078, 'learning_rate': 2.0406262054585738e-07, 'fcm_dpo/beta': 0.0016203692648559809, 'fcm_dpo/q_t': 0.40797942876815796, 'fcm_dpo/delta': 0.0024842238053679466, 'fcm_dpo/margin': 245.37954711914062, 'margin_dpo/margin_mean': 245.3795623779297, 'margin_dpo/margin_std': 322.3990173339844, 'logps/chosen': -414.4621887207031, 'logps/rejected': -706.7584228515625, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.0608139038086, 'KL/chosen_KL_mean': -361.31805419921875, 'KL/rejected_KL_mean': -606.6976318359375, 'KL/mean': -484.0078125, 'KL/std': 272.78863525390625, 'logits/chosen': -0.5373940467834473, 'logits/rejected': -0.5674378275871277, 'epoch': 0.6} + 60%|██████ | 412/681 [17:27<11:07, 2.48s/it] 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] {'loss': 1.081, 'grad_norm': 29.867643356323242, 'learning_rate': 2.0280175213768205e-07, 'fcm_dpo/beta': 0.001623795717023313, 'fcm_dpo/q_t': 0.4057984948158264, 'fcm_dpo/delta': 0.0013288334012031555, 'fcm_dpo/margin': 245.4788055419922, 'margin_dpo/margin_mean': 245.4788055419922, 'margin_dpo/margin_std': 293.564697265625, 'logps/chosen': -440.1754150390625, 'logps/rejected': -723.545654296875, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'KL/chosen_KL_mean': -378.59344482421875, 'KL/rejected_KL_mean': -624.072265625, 'KL/mean': -501.33282470703125, 'KL/std': 269.05731201171875, 'logits/chosen': -0.5124090909957886, 'logits/rejected': -0.5228337049484253, 'epoch': 0.61} + 61%|██████ | 413/681 [17:30<11:21, 2.54s/it] 61%|██████ | 414/681 [17:33<11:19, 2.54s/it] {'loss': 1.0618, 'grad_norm': 43.11936950683594, 'learning_rate': 2.0154212744723247e-07, 'fcm_dpo/beta': 0.0016230610199272633, 'fcm_dpo/q_t': 0.3998476266860962, 'fcm_dpo/delta': -0.030179578810930252, 'fcm_dpo/margin': 263.87225341796875, 'margin_dpo/margin_mean': 263.87225341796875, 'margin_dpo/margin_std': 297.9873046875, 'logps/chosen': -390.5539245605469, 'logps/rejected': -695.441162109375, 'logps/ref_chosen': -46.63148498535156, 'logps/ref_rejected': -87.64653015136719, 'KL/chosen_KL_mean': -343.92242431640625, 'KL/rejected_KL_mean': -607.794677734375, 'KL/mean': -475.8585510253906, 'KL/std': 259.8440856933594, 'logits/chosen': -0.45165306329727173, 'logits/rejected': -0.4454384446144104, 'epoch': 0.61} + 61%|██████ | 414/681 [17:33<11:19, 2.54s/it] 61%|██████ | 415/681 [17:35<11:32, 2.60s/it] {'loss': 1.1367, 'grad_norm': 27.217483520507812, 'learning_rate': 2.002837796667909e-07, 'fcm_dpo/beta': 0.001621844945475459, 'fcm_dpo/q_t': 0.42238879203796387, 'fcm_dpo/delta': 0.0709480568766594, 'fcm_dpo/margin': 204.3395233154297, 'margin_dpo/margin_mean': 204.3395233154297, 'margin_dpo/margin_std': 292.1011962890625, 'logps/chosen': -460.90667724609375, 'logps/rejected': -687.10546875, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'KL/chosen_KL_mean': -382.28839111328125, 'KL/rejected_KL_mean': -586.6279296875, 'KL/mean': -484.4581604003906, 'KL/std': 261.7178649902344, 'logits/chosen': -0.5291392207145691, 'logits/rejected': -0.5265468955039978, 'epoch': 0.61} + 61%|██████ | 415/681 [17:35<11:32, 2.60s/it] 61%|██████ | 416/681 [17:38<11:26, 2.59s/it] {'loss': 0.9928, 'grad_norm': 35.17634582519531, 'learning_rate': 1.990267419549914e-07, 'fcm_dpo/beta': 0.0016112902667373419, 'fcm_dpo/q_t': 0.3814007043838501, 'fcm_dpo/delta': -0.11248860508203506, 'fcm_dpo/margin': 314.61083984375, 'margin_dpo/margin_mean': 314.61077880859375, 'margin_dpo/margin_std': 286.4470520019531, 'logps/chosen': -411.85089111328125, 'logps/rejected': -758.7512817382812, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'KL/chosen_KL_mean': -353.57177734375, 'KL/rejected_KL_mean': -668.1825561523438, 'KL/mean': -510.877197265625, 'KL/std': 296.62371826171875, 'logits/chosen': -0.5343978404998779, 'logits/rejected': -0.5423879623413086, 'epoch': 0.61} + 61%|██████ | 416/681 [17:38<11:26, 2.59s/it] 61%|██████ | 417/681 [17:40<11:06, 2.52s/it] {'loss': 1.0501, 'grad_norm': 30.77235984802246, 'learning_rate': 1.9777104743594686e-07, 'fcm_dpo/beta': 0.0015942594036459923, 'fcm_dpo/q_t': 0.4002404808998108, 'fcm_dpo/delta': -0.023848645389080048, 'fcm_dpo/margin': 265.21044921875, 'margin_dpo/margin_mean': 265.21044921875, 'margin_dpo/margin_std': 265.76727294921875, 'logps/chosen': -407.540771484375, 'logps/rejected': -690.704345703125, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'KL/chosen_KL_mean': -357.342041015625, 'KL/rejected_KL_mean': -622.552490234375, 'KL/mean': -489.947265625, 'KL/std': 266.30548095703125, 'logits/chosen': -0.5373271703720093, 'logits/rejected': -0.5231212377548218, 'epoch': 0.61} + 61%|██████ | 417/681 [17:40<11:06, 2.52s/it] 61%|██████▏ | 418/681 [17:43<11:14, 2.56s/it] {'loss': 1.0779, 'grad_norm': 22.210895538330078, 'learning_rate': 1.965167291983757e-07, 'fcm_dpo/beta': 0.001592871267348528, 'fcm_dpo/q_t': 0.4023270905017853, 'fcm_dpo/delta': -0.03536780923604965, 'fcm_dpo/margin': 271.8375244140625, 'margin_dpo/margin_mean': 271.8375244140625, 'margin_dpo/margin_std': 355.95672607421875, 'logps/chosen': -464.7359619140625, 'logps/rejected': -759.2864990234375, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'KL/chosen_KL_mean': -382.75750732421875, 'KL/rejected_KL_mean': -654.5950317382812, 'KL/mean': -518.67626953125, 'KL/std': 304.6961669921875, 'logits/chosen': -0.6004199385643005, 'logits/rejected': -0.5827990174293518, 'epoch': 0.61} + 61%|██████▏ | 418/681 [17:43<11:14, 2.56s/it] 62%|██████▏ | 419/681 [17:45<11:09, 2.56s/it] {'loss': 1.0314, 'grad_norm': 28.61322784423828, 'learning_rate': 1.9526382029472988e-07, 'fcm_dpo/beta': 0.001561171025969088, 'fcm_dpo/q_t': 0.39019423723220825, 'fcm_dpo/delta': -0.07300984114408493, 'fcm_dpo/margin': 300.7904052734375, 'margin_dpo/margin_mean': 300.7904052734375, 'margin_dpo/margin_std': 325.02825927734375, 'logps/chosen': -412.8028869628906, 'logps/rejected': -752.2276611328125, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'KL/chosen_KL_mean': -359.854248046875, 'KL/rejected_KL_mean': -660.6445922851562, 'KL/mean': -510.2494201660156, 'KL/std': 277.7416076660156, 'logits/chosen': -0.5227010846138, 'logits/rejected': -0.5222500562667847, 'epoch': 0.62} + 62%|██████▏ | 419/681 [17:45<11:09, 2.56s/it] 62%|██████▏ | 420/681 [17:48<11:04, 2.55s/it] {'loss': 1.2047, 'grad_norm': 54.64779281616211, 'learning_rate': 1.9401235374032425e-07, 'fcm_dpo/beta': 0.0015826968010514975, 'fcm_dpo/q_t': 0.4339344799518585, 'fcm_dpo/delta': 0.1057576984167099, 'fcm_dpo/margin': 187.75083923339844, 'margin_dpo/margin_mean': 187.75082397460938, 'margin_dpo/margin_std': 400.8773193359375, 'logps/chosen': -530.2868041992188, 'logps/rejected': -709.5875244140625, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'KL/chosen_KL_mean': -452.51690673828125, 'KL/rejected_KL_mean': -640.2677001953125, 'KL/mean': -546.392333984375, 'KL/std': 296.0887145996094, 'logits/chosen': -0.5721093416213989, 'logits/rejected': -0.5451463460922241, 'epoch': 0.62} + 62%|██████▏ | 420/681 [17:48<11:04, 2.55s/it] 62%|██████▏ | 421/681 [17:51<11:15, 2.60s/it] {'loss': 1.1373, 'grad_norm': 27.371572494506836, 'learning_rate': 1.9276236251246653e-07, 'fcm_dpo/beta': 0.0016100335633382201, 'fcm_dpo/q_t': 0.4196345806121826, 'fcm_dpo/delta': 0.0610845573246479, 'fcm_dpo/margin': 211.4017333984375, 'margin_dpo/margin_mean': 211.4017333984375, 'margin_dpo/margin_std': 309.2540283203125, 'logps/chosen': -433.17120361328125, 'logps/rejected': -680.0885009765625, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'KL/chosen_KL_mean': -379.40533447265625, 'KL/rejected_KL_mean': -590.8070068359375, 'KL/mean': -485.106201171875, 'KL/std': 287.34088134765625, 'logits/chosen': -0.5819834470748901, 'logits/rejected': -0.5714644193649292, 'epoch': 0.62} + 62%|██████▏ | 421/681 [17:51<11:15, 2.60s/it] 62%|██████▏ | 422/681 [17:54<11:31, 2.67s/it] {'loss': 1.1085, 'grad_norm': 33.85724639892578, 'learning_rate': 1.9151387954958792e-07, 'fcm_dpo/beta': 0.0016107236733660102, 'fcm_dpo/q_t': 0.4098985493183136, 'fcm_dpo/delta': 0.013142132200300694, 'fcm_dpo/margin': 240.49374389648438, 'margin_dpo/margin_mean': 240.49374389648438, 'margin_dpo/margin_std': 345.5645446777344, 'logps/chosen': -492.536865234375, 'logps/rejected': -752.2603759765625, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'KL/chosen_KL_mean': -423.903076171875, 'KL/rejected_KL_mean': -664.3968505859375, 'KL/mean': -544.1499633789062, 'KL/std': 294.40728759765625, 'logits/chosen': -0.6141137480735779, 'logits/rejected': -0.6188079118728638, 'epoch': 0.62} + 62%|██████▏ | 422/681 [17:54<11:31, 2.67s/it] 62%|██████▏ | 423/681 [17:56<11:07, 2.59s/it] {'loss': 1.072, 'grad_norm': 31.080442428588867, 'learning_rate': 1.902669377503756e-07, 'fcm_dpo/beta': 0.0016070720739662647, 'fcm_dpo/q_t': 0.4035068154335022, 'fcm_dpo/delta': -0.016826242208480835, 'fcm_dpo/margin': 258.9209899902344, 'margin_dpo/margin_mean': 258.9209899902344, 'margin_dpo/margin_std': 313.36004638671875, 'logps/chosen': -459.6838073730469, 'logps/rejected': -749.9210205078125, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'KL/chosen_KL_mean': -404.6935119628906, 'KL/rejected_KL_mean': -663.6144409179688, 'KL/mean': -534.154052734375, 'KL/std': 282.32171630859375, 'logits/chosen': -0.5942707061767578, 'logits/rejected': -0.6040855646133423, 'epoch': 0.62} + 62%|██████▏ | 423/681 [17:56<11:07, 2.59s/it] 62%|██████▏ | 424/681 [17:59<11:10, 2.61s/it] {'loss': 1.0919, 'grad_norm': 33.83623123168945, 'learning_rate': 1.890215699729057e-07, 'fcm_dpo/beta': 0.0015995125286281109, 'fcm_dpo/q_t': 0.40843045711517334, 'fcm_dpo/delta': -0.003478415310382843, 'fcm_dpo/margin': 251.95314025878906, 'margin_dpo/margin_mean': 251.95315551757812, 'margin_dpo/margin_std': 339.11346435546875, 'logps/chosen': -421.3399658203125, 'logps/rejected': -683.7601928710938, 'logps/ref_chosen': -56.01192092895508, 'logps/ref_rejected': -66.47896575927734, 'KL/chosen_KL_mean': -365.32806396484375, 'KL/rejected_KL_mean': -617.28125, 'KL/mean': -491.30462646484375, 'KL/std': 282.08697509765625, 'logits/chosen': -0.577785313129425, 'logits/rejected': -0.555591881275177, 'epoch': 0.62} + 62%|██████▏ | 424/681 [17:59<11:10, 2.61s/it] 62%|██████▏ | 425/681 [18:01<11:00, 2.58s/it] {'loss': 1.1673, 'grad_norm': 31.754074096679688, 'learning_rate': 1.8777780903377732e-07, 'fcm_dpo/beta': 0.001631980761885643, 'fcm_dpo/q_t': 0.42644861340522766, 'fcm_dpo/delta': 0.09120546281337738, 'fcm_dpo/margin': 190.730224609375, 'margin_dpo/margin_mean': 190.730224609375, 'margin_dpo/margin_std': 325.1813049316406, 'logps/chosen': -451.20458984375, 'logps/rejected': -690.9912719726562, 'logps/ref_chosen': -46.86899948120117, 'logps/ref_rejected': -95.92545318603516, 'KL/chosen_KL_mean': -404.3356018066406, 'KL/rejected_KL_mean': -595.0657958984375, 'KL/mean': -499.7007141113281, 'KL/std': 263.603759765625, 'logits/chosen': -0.5840317606925964, 'logits/rejected': -0.5841487646102905, 'epoch': 0.62} + 62%|██████▏ | 425/681 [18:01<11:00, 2.58s/it] 63%|██████▎ | 426/681 [18:04<11:04, 2.61s/it] {'loss': 1.0805, 'grad_norm': 29.338314056396484, 'learning_rate': 1.8653568770724803e-07, 'fcm_dpo/beta': 0.001641254872083664, 'fcm_dpo/q_t': 0.40564680099487305, 'fcm_dpo/delta': -0.004648171365261078, 'fcm_dpo/margin': 246.22134399414062, 'margin_dpo/margin_mean': 246.22132873535156, 'margin_dpo/margin_std': 294.9763488769531, 'logps/chosen': -452.35687255859375, 'logps/rejected': -703.26123046875, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'KL/chosen_KL_mean': -375.7733154296875, 'KL/rejected_KL_mean': -621.99462890625, 'KL/mean': -498.88397216796875, 'KL/std': 270.572509765625, 'logits/chosen': -0.5852512121200562, 'logits/rejected': -0.5579032897949219, 'epoch': 0.63} + 63%|██████▎ | 426/681 [18:04<11:04, 2.61s/it] 63%|██████▎ | 427/681 [18:06<11:02, 2.61s/it] {'loss': 1.1492, 'grad_norm': 25.174968719482422, 'learning_rate': 1.8529523872436977e-07, 'fcm_dpo/beta': 0.0016534591559320688, 'fcm_dpo/q_t': 0.4280179440975189, 'fcm_dpo/delta': 0.09219777584075928, 'fcm_dpo/margin': 187.9479217529297, 'margin_dpo/margin_mean': 187.94793701171875, 'margin_dpo/margin_std': 274.6386413574219, 'logps/chosen': -404.7421875, 'logps/rejected': -606.4022216796875, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.5660171508789, 'KL/chosen_KL_mean': -339.8883361816406, 'KL/rejected_KL_mean': -527.8362426757812, 'KL/mean': -433.8622741699219, 'KL/std': 233.69332885742188, 'logits/chosen': -0.6304788589477539, 'logits/rejected': -0.6161661744117737, 'epoch': 0.63} + 63%|██████▎ | 427/681 [18:06<11:02, 2.61s/it] 63%|██████▎ | 428/681 [18:09<11:04, 2.63s/it] {'loss': 1.1009, 'grad_norm': 36.647064208984375, 'learning_rate': 1.8405649477212697e-07, 'fcm_dpo/beta': 0.0016524514649063349, 'fcm_dpo/q_t': 0.4040681719779968, 'fcm_dpo/delta': -0.022071223706007004, 'fcm_dpo/margin': 254.64175415039062, 'margin_dpo/margin_mean': 254.64175415039062, 'margin_dpo/margin_std': 378.00115966796875, 'logps/chosen': -483.7822265625, 'logps/rejected': -779.0691528320312, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28181457519531, 'KL/chosen_KL_mean': -421.1455383300781, 'KL/rejected_KL_mean': -675.787353515625, 'KL/mean': -548.4664306640625, 'KL/std': 309.02484130859375, 'logits/chosen': -0.5900696516036987, 'logits/rejected': -0.5930036306381226, 'epoch': 0.63} + 63%|██████▎ | 428/681 [18:09<11:04, 2.63s/it] 63%|██████▎ | 429/681 [18:12<10:59, 2.62s/it] {'loss': 1.171, 'grad_norm': 34.380889892578125, 'learning_rate': 1.828194884925749e-07, 'fcm_dpo/beta': 0.0016535113099962473, 'fcm_dpo/q_t': 0.42727112770080566, 'fcm_dpo/delta': -0.020081549882888794, 'fcm_dpo/margin': 190.3538818359375, 'margin_dpo/margin_mean': 190.3538818359375, 'margin_dpo/margin_std': 328.217041015625, 'logps/chosen': -493.3191223144531, 'logps/rejected': -694.23388671875, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'KL/chosen_KL_mean': -412.0851135253906, 'KL/rejected_KL_mean': -602.43896484375, 'KL/mean': -507.26202392578125, 'KL/std': 265.12261962890625, 'logits/chosen': -0.5973831415176392, 'logits/rejected': -0.5831949710845947, 'epoch': 0.63} + 63%|██████▎ | 429/681 [18:12<10:59, 2.62s/it] 63%|██████▎ | 430/681 [18:14<11:06, 2.66s/it] {'loss': 1.1216, 'grad_norm': 28.673410415649414, 'learning_rate': 1.8158425248197928e-07, 'fcm_dpo/beta': 0.0016685712616890669, 'fcm_dpo/q_t': 0.42112964391708374, 'fcm_dpo/delta': 0.06419498473405838, 'fcm_dpo/margin': 202.4704132080078, 'margin_dpo/margin_mean': 202.4704132080078, 'margin_dpo/margin_std': 261.1304016113281, 'logps/chosen': -385.0269775390625, 'logps/rejected': -630.9998779296875, 'logps/ref_chosen': -60.920326232910156, 'logps/ref_rejected': -104.42280578613281, 'KL/chosen_KL_mean': -324.106689453125, 'KL/rejected_KL_mean': -526.5770263671875, 'KL/mean': -425.34185791015625, 'KL/std': 241.1708221435547, 'logits/chosen': -0.5664153099060059, 'logits/rejected': -0.562206506729126, 'epoch': 0.63} + 63%|██████▎ | 430/681 [18:14<11:06, 2.66s/it] 63%|██████▎ | 431/681 [18:17<11:13, 2.69s/it] {'loss': 1.01, 'grad_norm': 27.440969467163086, 'learning_rate': 1.8035081928995788e-07, 'fcm_dpo/beta': 0.001646057702600956, 'fcm_dpo/q_t': 0.38535940647125244, 'fcm_dpo/delta': -0.09401103109121323, 'fcm_dpo/margin': 297.22686767578125, 'margin_dpo/margin_mean': 297.2269287109375, 'margin_dpo/margin_std': 285.8399658203125, 'logps/chosen': -357.62249755859375, 'logps/rejected': -690.3408203125, 'logps/ref_chosen': -57.34874725341797, 'logps/ref_rejected': -92.84022521972656, 'KL/chosen_KL_mean': -300.27374267578125, 'KL/rejected_KL_mean': -597.5006103515625, 'KL/mean': -448.88720703125, 'KL/std': 266.6134338378906, 'logits/chosen': -0.53639817237854, 'logits/rejected': -0.5400429964065552, 'epoch': 0.63} + 63%|██████▎ | 431/681 [18:17<11:13, 2.69s/it] 63%|██████▎ | 432/681 [18:20<11:14, 2.71s/it] {'loss': 1.0284, 'grad_norm': 43.64820098876953, 'learning_rate': 1.791192214186223e-07, 'fcm_dpo/beta': 0.0016348997596651316, 'fcm_dpo/q_t': 0.3924236297607422, 'fcm_dpo/delta': -0.06477323174476624, 'fcm_dpo/margin': 282.22406005859375, 'margin_dpo/margin_mean': 282.2240295410156, 'margin_dpo/margin_std': 273.5948181152344, 'logps/chosen': -380.5098876953125, 'logps/rejected': -690.2386474609375, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'KL/chosen_KL_mean': -309.4350891113281, 'KL/rejected_KL_mean': -591.6591186523438, 'KL/mean': -450.547119140625, 'KL/std': 265.4091796875, 'logits/chosen': -0.502853274345398, 'logits/rejected': -0.4937119781970978, 'epoch': 0.63} + 63%|██████▎ | 432/681 [18:20<11:14, 2.71s/it] 64%|██████▎ | 433/681 [18:23<11:07, 2.69s/it] {'loss': 1.1645, 'grad_norm': 37.26985549926758, 'learning_rate': 1.7788949132172193e-07, 'fcm_dpo/beta': 0.0016442297492176294, 'fcm_dpo/q_t': 0.42625609040260315, 'fcm_dpo/delta': 0.09210029989480972, 'fcm_dpo/margin': 188.88734436035156, 'margin_dpo/margin_mean': 188.88734436035156, 'margin_dpo/margin_std': 314.8270263671875, 'logps/chosen': -462.4809265136719, 'logps/rejected': -689.0460205078125, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'KL/chosen_KL_mean': -404.2077331542969, 'KL/rejected_KL_mean': -593.0950927734375, 'KL/mean': -498.65142822265625, 'KL/std': 266.9083251953125, 'logits/chosen': -0.5648001432418823, 'logits/rejected': -0.5520174503326416, 'epoch': 0.64} + 64%|██████▎ | 433/681 [18:23<11:07, 2.69s/it] 64%|██████▎ | 434/681 [18:25<11:00, 2.67s/it] {'loss': 1.1083, 'grad_norm': 22.998327255249023, 'learning_rate': 1.7666166140378853e-07, 'fcm_dpo/beta': 0.001646613236516714, 'fcm_dpo/q_t': 0.4145994186401367, 'fcm_dpo/delta': 0.010008249431848526, 'fcm_dpo/margin': 236.9807586669922, 'margin_dpo/margin_mean': 236.98074340820312, 'margin_dpo/margin_std': 357.0912780761719, 'logps/chosen': -420.76861572265625, 'logps/rejected': -674.2742919921875, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'KL/chosen_KL_mean': -358.794921875, 'KL/rejected_KL_mean': -595.7756958007812, 'KL/mean': -477.2853088378906, 'KL/std': 286.1195068359375, 'logits/chosen': -0.5716849565505981, 'logits/rejected': -0.5764377117156982, 'epoch': 0.64} + 64%|██████▎ | 434/681 [18:25<11:00, 2.67s/it] 64%|██████▍ | 435/681 [18:27<10:25, 2.54s/it] {'loss': 1.0804, 'grad_norm': 30.54098892211914, 'learning_rate': 1.7543576401928218e-07, 'fcm_dpo/beta': 0.0016480737831443548, 'fcm_dpo/q_t': 0.40409788489341736, 'fcm_dpo/delta': -0.01371398288756609, 'fcm_dpo/margin': 250.6724853515625, 'margin_dpo/margin_mean': 250.6724853515625, 'margin_dpo/margin_std': 310.9295654296875, 'logps/chosen': -380.8844299316406, 'logps/rejected': -667.6217651367188, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'KL/chosen_KL_mean': -329.38238525390625, 'KL/rejected_KL_mean': -580.0548706054688, 'KL/mean': -454.7186279296875, 'KL/std': 273.7630615234375, 'logits/chosen': -0.5849795937538147, 'logits/rejected': -0.5741031169891357, 'epoch': 0.64} + 64%|██████▍ | 435/681 [18:27<10:25, 2.54s/it] 64%|██████▍ | 436/681 [18:30<10:29, 2.57s/it] {'loss': 1.1082, 'grad_norm': 44.92102813720703, 'learning_rate': 1.742118314717391e-07, 'fcm_dpo/beta': 0.0016525493701919913, 'fcm_dpo/q_t': 0.41519662737846375, 'fcm_dpo/delta': 0.03764678165316582, 'fcm_dpo/margin': 220.05868530273438, 'margin_dpo/margin_mean': 220.05868530273438, 'margin_dpo/margin_std': 284.269287109375, 'logps/chosen': -422.1286315917969, 'logps/rejected': -653.5113525390625, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'KL/chosen_KL_mean': -350.72491455078125, 'KL/rejected_KL_mean': -570.7836303710938, 'KL/mean': -460.7542724609375, 'KL/std': 239.6090087890625, 'logits/chosen': -0.5648950934410095, 'logits/rejected': -0.5372939109802246, 'epoch': 0.64} + 64%|██████▍ | 436/681 [18:30<10:29, 2.57s/it] 64%|██████▍ | 437/681 [18:33<10:37, 2.61s/it] {'loss': 1.1003, 'grad_norm': 30.965953826904297, 'learning_rate': 1.7298989601292036e-07, 'fcm_dpo/beta': 0.0016638417728245258, 'fcm_dpo/q_t': 0.41194066405296326, 'fcm_dpo/delta': 0.02482348121702671, 'fcm_dpo/margin': 226.03802490234375, 'margin_dpo/margin_mean': 226.03802490234375, 'margin_dpo/margin_std': 288.8181457519531, 'logps/chosen': -422.13812255859375, 'logps/rejected': -665.4754638671875, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'KL/chosen_KL_mean': -357.39385986328125, 'KL/rejected_KL_mean': -583.431884765625, 'KL/mean': -470.41290283203125, 'KL/std': 238.08837890625, 'logits/chosen': -0.5961349010467529, 'logits/rejected': -0.575666606426239, 'epoch': 0.64} + 64%|██████▍ | 437/681 [18:33<10:37, 2.61s/it] 64%|██████▍ | 438/681 [18:35<10:19, 2.55s/it] {'loss': 1.0646, 'grad_norm': 31.922332763671875, 'learning_rate': 1.7176998984196144e-07, 'fcm_dpo/beta': 0.0016566277481615543, 'fcm_dpo/q_t': 0.40168195962905884, 'fcm_dpo/delta': -0.02283564768731594, 'fcm_dpo/margin': 254.41964721679688, 'margin_dpo/margin_mean': 254.41964721679688, 'margin_dpo/margin_std': 291.51605224609375, 'logps/chosen': -427.9560546875, 'logps/rejected': -706.433837890625, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682800292969, 'KL/chosen_KL_mean': -368.9373779296875, 'KL/rejected_KL_mean': -623.3570556640625, 'KL/mean': -496.147216796875, 'KL/std': 269.57830810546875, 'logits/chosen': -0.6157523393630981, 'logits/rejected': -0.6025946736335754, 'epoch': 0.64} + 64%|██████▍ | 438/681 [18:35<10:19, 2.55s/it] 64%|██████▍ | 439/681 [18:38<10:07, 2.51s/it] {'loss': 1.1382, 'grad_norm': 29.01395606994629, 'learning_rate': 1.7055214510452458e-07, 'fcm_dpo/beta': 0.0016382005997002125, 'fcm_dpo/q_t': 0.420367956161499, 'fcm_dpo/delta': -0.06990180164575577, 'fcm_dpo/margin': 210.9752655029297, 'margin_dpo/margin_mean': 210.97525024414062, 'margin_dpo/margin_std': 309.2723388671875, 'logps/chosen': -439.52783203125, 'logps/rejected': -680.7044677734375, 'logps/ref_chosen': -53.78407669067383, 'logps/ref_rejected': -83.98545837402344, 'KL/chosen_KL_mean': -385.7437744140625, 'KL/rejected_KL_mean': -596.718994140625, 'KL/mean': -491.2313537597656, 'KL/std': 272.39862060546875, 'logits/chosen': -0.5831667184829712, 'logits/rejected': -0.5851659774780273, 'epoch': 0.64} + 64%|██████▍ | 439/681 [18:38<10:07, 2.51s/it] 65%|██████▍ | 440/681 [18:40<09:55, 2.47s/it] {'loss': 1.0978, 'grad_norm': 30.73674201965332, 'learning_rate': 1.6933639389195134e-07, 'fcm_dpo/beta': 0.0016412187833338976, 'fcm_dpo/q_t': 0.41035932302474976, 'fcm_dpo/delta': -0.0038700848817825317, 'fcm_dpo/margin': 245.9200897216797, 'margin_dpo/margin_mean': 245.92010498046875, 'margin_dpo/margin_std': 351.30108642578125, 'logps/chosen': -504.8084716796875, 'logps/rejected': -768.65966796875, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'KL/chosen_KL_mean': -426.24176025390625, 'KL/rejected_KL_mean': -672.161865234375, 'KL/mean': -549.2017822265625, 'KL/std': 338.7626953125, 'logits/chosen': -0.6162642240524292, 'logits/rejected': -0.6109949946403503, 'epoch': 0.65} + 65%|██████▍ | 440/681 [18:40<09:55, 2.47s/it] 65%|██████▍ | 441/681 [18:43<10:04, 2.52s/it] {'loss': 1.1403, 'grad_norm': 38.487876892089844, 'learning_rate': 1.681227682404166e-07, 'fcm_dpo/beta': 0.0016440332401543856, 'fcm_dpo/q_t': 0.4138296842575073, 'fcm_dpo/delta': 0.006565794348716736, 'fcm_dpo/margin': 239.3212127685547, 'margin_dpo/margin_mean': 239.3212127685547, 'margin_dpo/margin_std': 414.8189392089844, 'logps/chosen': -536.0440673828125, 'logps/rejected': -811.0116577148438, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'KL/chosen_KL_mean': -475.21966552734375, 'KL/rejected_KL_mean': -714.5408935546875, 'KL/mean': -594.8802490234375, 'KL/std': 347.33465576171875, 'logits/chosen': -0.6530791521072388, 'logits/rejected': -0.6417888402938843, 'epoch': 0.65} + 65%|██████▍ | 441/681 [18:43<10:04, 2.52s/it] 65%|██████▍ | 442/681 [18:45<09:51, 2.48s/it] {'loss': 1.0621, 'grad_norm': 32.10857391357422, 'learning_rate': 1.669113001300851e-07, 'fcm_dpo/beta': 0.0016326969489455223, 'fcm_dpo/q_t': 0.39594757556915283, 'fcm_dpo/delta': -0.07312282174825668, 'fcm_dpo/margin': 287.401123046875, 'margin_dpo/margin_mean': 287.401123046875, 'margin_dpo/margin_std': 378.95294189453125, 'logps/chosen': -457.77447509765625, 'logps/rejected': -774.7036743164062, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'KL/chosen_KL_mean': -410.76324462890625, 'KL/rejected_KL_mean': -698.1644287109375, 'KL/mean': -554.4638671875, 'KL/std': 337.224853515625, 'logits/chosen': -0.6253660321235657, 'logits/rejected': -0.6190581321716309, 'epoch': 0.65} + 65%|██████▍ | 442/681 [18:45<09:51, 2.48s/it] 65%|██████▌ | 443/681 [18:48<10:04, 2.54s/it] {'loss': 1.2045, 'grad_norm': 36.12958526611328, 'learning_rate': 1.6570202148426815e-07, 'fcm_dpo/beta': 0.0016142401145771146, 'fcm_dpo/q_t': 0.43172866106033325, 'fcm_dpo/delta': -0.0032433748710900545, 'fcm_dpo/margin': 189.53274536132812, 'margin_dpo/margin_mean': 189.53274536132812, 'margin_dpo/margin_std': 403.9544677734375, 'logps/chosen': -530.1447143554688, 'logps/rejected': -735.0844116210938, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'KL/chosen_KL_mean': -458.8716735839844, 'KL/rejected_KL_mean': -648.4044799804688, 'KL/mean': -553.6380615234375, 'KL/std': 334.3310546875, 'logits/chosen': -0.6477606296539307, 'logits/rejected': -0.6300950050354004, 'epoch': 0.65} + 65%|██████▌ | 443/681 [18:48<10:04, 2.54s/it] 65%|██████▌ | 444/681 [18:50<09:58, 2.53s/it] {'loss': 1.0518, 'grad_norm': 33.08613586425781, 'learning_rate': 1.6449496416858282e-07, 'fcm_dpo/beta': 0.0015917312121018767, 'fcm_dpo/q_t': 0.39380595088005066, 'fcm_dpo/delta': -0.0749378427863121, 'fcm_dpo/margin': 295.9281921386719, 'margin_dpo/margin_mean': 295.92822265625, 'margin_dpo/margin_std': 379.2266845703125, 'logps/chosen': -509.6728515625, 'logps/rejected': -845.6422119140625, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489807128906, 'KL/chosen_KL_mean': -452.4591064453125, 'KL/rejected_KL_mean': -748.3873291015625, 'KL/mean': -600.4232177734375, 'KL/std': 348.21685791015625, 'logits/chosen': -0.6086077690124512, 'logits/rejected': -0.6182563900947571, 'epoch': 0.65} + 65%|██████▌ | 444/681 [18:50<09:58, 2.53s/it] 65%|██████▌ | 445/681 [18:53<10:14, 2.60s/it] {'loss': 1.0577, 'grad_norm': 30.974002838134766, 'learning_rate': 1.6329015999011182e-07, 'fcm_dpo/beta': 0.0015803833957761526, 'fcm_dpo/q_t': 0.39755940437316895, 'fcm_dpo/delta': -0.05051477625966072, 'fcm_dpo/margin': 283.6526794433594, 'margin_dpo/margin_mean': 283.6527099609375, 'margin_dpo/margin_std': 343.00897216796875, 'logps/chosen': -464.0806579589844, 'logps/rejected': -773.1162109375, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267059326172, 'KL/chosen_KL_mean': -396.7808532714844, 'KL/rejected_KL_mean': -680.43359375, 'KL/mean': -538.607177734375, 'KL/std': 282.3050537109375, 'logits/chosen': -0.6230882406234741, 'logits/rejected': -0.6196198463439941, 'epoch': 0.65} + 65%|██████▌ | 445/681 [18:53<10:14, 2.60s/it] 65%|██████▌ | 446/681 [18:56<10:12, 2.61s/it] {'loss': 1.0322, 'grad_norm': 34.6982421875, 'learning_rate': 1.6208764069656578e-07, 'fcm_dpo/beta': 0.0015668668784201145, 'fcm_dpo/q_t': 0.39133375883102417, 'fcm_dpo/delta': -0.06862294673919678, 'fcm_dpo/margin': 296.90032958984375, 'margin_dpo/margin_mean': 296.90032958984375, 'margin_dpo/margin_std': 308.796875, 'logps/chosen': -421.77020263671875, 'logps/rejected': -760.836181640625, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'KL/chosen_KL_mean': -362.6717529296875, 'KL/rejected_KL_mean': -659.572021484375, 'KL/mean': -511.12188720703125, 'KL/std': 300.0148620605469, 'logits/chosen': -0.6164995431900024, 'logits/rejected': -0.6332226991653442, 'epoch': 0.65} + 65%|██████▌ | 446/681 [18:56<10:12, 2.61s/it] 66%|██████▌ | 447/681 [18:58<09:52, 2.53s/it] {'loss': 1.0422, 'grad_norm': 34.17955780029297, 'learning_rate': 1.608874379754465e-07, 'fcm_dpo/beta': 0.0015306383138522506, 'fcm_dpo/q_t': 0.3929889500141144, 'fcm_dpo/delta': -0.07781445980072021, 'fcm_dpo/margin': 309.5242004394531, 'margin_dpo/margin_mean': 309.5242004394531, 'margin_dpo/margin_std': 381.69482421875, 'logps/chosen': -430.107421875, 'logps/rejected': -782.2510986328125, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'KL/chosen_KL_mean': -374.0321044921875, 'KL/rejected_KL_mean': -683.5562744140625, 'KL/mean': -528.794189453125, 'KL/std': 340.5907897949219, 'logits/chosen': -0.6846290826797485, 'logits/rejected': -0.6984615325927734, 'epoch': 0.66} + 66%|██████▌ | 447/681 [18:58<09:52, 2.53s/it] 66%|██████▌ | 448/681 [19:00<09:52, 2.54s/it] {'loss': 1.0631, 'grad_norm': 42.507076263427734, 'learning_rate': 1.5968958345321177e-07, 'fcm_dpo/beta': 0.0015249757561832666, 'fcm_dpo/q_t': 0.3992360234260559, 'fcm_dpo/delta': -0.040184423327445984, 'fcm_dpo/margin': 287.4287414550781, 'margin_dpo/margin_mean': 287.42877197265625, 'margin_dpo/margin_std': 350.75927734375, 'logps/chosen': -482.5364074707031, 'logps/rejected': -812.2259521484375, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'KL/chosen_KL_mean': -422.5325622558594, 'KL/rejected_KL_mean': -709.9613037109375, 'KL/mean': -566.2469482421875, 'KL/std': 306.77191162109375, 'logits/chosen': -0.579893171787262, 'logits/rejected': -0.5896936655044556, 'epoch': 0.66} + 66%|██████▌ | 448/681 [19:01<09:52, 2.54s/it] 66%|██████▌ | 449/681 [19:03<09:48, 2.54s/it] {'loss': 1.0819, 'grad_norm': 32.49612808227539, 'learning_rate': 1.584941086944423e-07, 'fcm_dpo/beta': 0.0015035069081932306, 'fcm_dpo/q_t': 0.40181848406791687, 'fcm_dpo/delta': -0.048243433237075806, 'fcm_dpo/margin': 296.62786865234375, 'margin_dpo/margin_mean': 296.62786865234375, 'margin_dpo/margin_std': 435.5914306640625, 'logps/chosen': -489.9390869140625, 'logps/rejected': -807.63720703125, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'KL/chosen_KL_mean': -422.4124755859375, 'KL/rejected_KL_mean': -719.040283203125, 'KL/mean': -570.7263793945312, 'KL/std': 365.52325439453125, 'logits/chosen': -0.6347248554229736, 'logits/rejected': -0.6339551210403442, 'epoch': 0.66} + 66%|██████▌ | 449/681 [19:03<09:48, 2.54s/it] 66%|██████▌ | 450/681 [19:06<09:45, 2.53s/it] {'loss': 1.013, 'grad_norm': 61.81324005126953, 'learning_rate': 1.573010452010098e-07, 'fcm_dpo/beta': 0.0014846834819763899, 'fcm_dpo/q_t': 0.3863303065299988, 'fcm_dpo/delta': -0.08808425813913345, 'fcm_dpo/margin': 325.9302978515625, 'margin_dpo/margin_mean': 325.9302978515625, 'margin_dpo/margin_std': 321.34356689453125, 'logps/chosen': -400.1941833496094, 'logps/rejected': -771.7713623046875, 'logps/ref_chosen': -57.10811996459961, 'logps/ref_rejected': -102.75494384765625, 'KL/chosen_KL_mean': -343.0860595703125, 'KL/rejected_KL_mean': -669.016357421875, 'KL/mean': -506.05120849609375, 'KL/std': 322.73223876953125, 'logits/chosen': -0.6573776006698608, 'logits/rejected': -0.6733522415161133, 'epoch': 0.66} + 66%|██████▌ | 450/681 [19:06<09:45, 2.53s/it] 66%|██████▌ | 451/681 [19:08<09:20, 2.44s/it] {'loss': 1.153, 'grad_norm': 32.96580505371094, 'learning_rate': 1.5611042441124687e-07, 'fcm_dpo/beta': 0.001488700625486672, 'fcm_dpo/q_t': 0.4151974320411682, 'fcm_dpo/delta': 0.04048318788409233, 'fcm_dpo/margin': 242.31259155273438, 'margin_dpo/margin_mean': 242.31259155273438, 'margin_dpo/margin_std': 429.1678466796875, 'logps/chosen': -528.6172485351562, 'logps/rejected': -785.390380859375, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'KL/chosen_KL_mean': -470.1484375, 'KL/rejected_KL_mean': -712.4609985351562, 'KL/mean': -591.3046875, 'KL/std': 378.45947265625, 'logits/chosen': -0.7209557294845581, 'logits/rejected': -0.7045374512672424, 'epoch': 0.66} + 66%|██████▌ | 451/681 [19:08<09:20, 2.44s/it] 66%|██████▋ | 452/681 [19:10<09:18, 2.44s/it] {'loss': 1.0531, 'grad_norm': 22.152088165283203, 'learning_rate': 1.549222776991186e-07, 'fcm_dpo/beta': 0.001479277154430747, 'fcm_dpo/q_t': 0.39996248483657837, 'fcm_dpo/delta': -0.024412650614976883, 'fcm_dpo/margin': 286.115966796875, 'margin_dpo/margin_mean': 286.115966796875, 'margin_dpo/margin_std': 296.19451904296875, 'logps/chosen': -387.5494384765625, 'logps/rejected': -721.0462646484375, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77142333984375, 'KL/chosen_KL_mean': -337.15887451171875, 'KL/rejected_KL_mean': -623.2747802734375, 'KL/mean': -480.21685791015625, 'KL/std': 290.8729248046875, 'logits/chosen': -0.6340690851211548, 'logits/rejected': -0.6552602052688599, 'epoch': 0.66} + 66%|██████▋ | 452/681 [19:10<09:18, 2.44s/it] 67%|██████▋ | 453/681 [19:12<09:06, 2.40s/it] {'loss': 1.0938, 'grad_norm': 30.73015785217285, 'learning_rate': 1.5373663637339584e-07, 'fcm_dpo/beta': 0.0014776124153286219, 'fcm_dpo/q_t': 0.4101276695728302, 'fcm_dpo/delta': 0.004782242700457573, 'fcm_dpo/margin': 267.510986328125, 'margin_dpo/margin_mean': 267.510986328125, 'margin_dpo/margin_std': 357.81243896484375, 'logps/chosen': -450.06341552734375, 'logps/rejected': -742.0669555664062, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'KL/chosen_KL_mean': -392.34857177734375, 'KL/rejected_KL_mean': -659.8594970703125, 'KL/mean': -526.1040649414062, 'KL/std': 298.7330322265625, 'logits/chosen': -0.6592116355895996, 'logits/rejected': -0.6468891501426697, 'epoch': 0.67} + 67%|██████▋ | 453/681 [19:13<09:06, 2.40s/it] 67%|██████▋ | 454/681 [19:15<09:09, 2.42s/it] {'loss': 1.0603, 'grad_norm': 27.640525817871094, 'learning_rate': 1.5255353167683017e-07, 'fcm_dpo/beta': 0.0014682337641716003, 'fcm_dpo/q_t': 0.3982999324798584, 'fcm_dpo/delta': -0.044936180114746094, 'fcm_dpo/margin': 301.52032470703125, 'margin_dpo/margin_mean': 301.52032470703125, 'margin_dpo/margin_std': 374.39154052734375, 'logps/chosen': -533.7886962890625, 'logps/rejected': -859.3141479492188, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.95079040527344, 'KL/chosen_KL_mean': -472.8430480957031, 'KL/rejected_KL_mean': -774.3634033203125, 'KL/mean': -623.6031494140625, 'KL/std': 340.2906494140625, 'logits/chosen': -0.7141397595405579, 'logits/rejected': -0.707220196723938, 'epoch': 0.67} + 67%|██████▋ | 454/681 [19:15<09:09, 2.42s/it] 67%|██████▋ | 455/681 [19:17<09:04, 2.41s/it] {'loss': 1.0177, 'grad_norm': 39.29196548461914, 'learning_rate': 1.5137299478533064e-07, 'fcm_dpo/beta': 0.0014530689222738147, 'fcm_dpo/q_t': 0.38350850343704224, 'fcm_dpo/delta': -0.12079726159572601, 'fcm_dpo/margin': 354.0106201171875, 'margin_dpo/margin_mean': 354.0106201171875, 'margin_dpo/margin_std': 396.34912109375, 'logps/chosen': -451.7264709472656, 'logps/rejected': -876.15185546875, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'KL/chosen_KL_mean': -406.83978271484375, 'KL/rejected_KL_mean': -760.850341796875, 'KL/mean': -583.8450927734375, 'KL/std': 351.13433837890625, 'logits/chosen': -0.6605424284934998, 'logits/rejected': -0.683269202709198, 'epoch': 0.67} + 67%|██████▋ | 455/681 [19:17<09:04, 2.41s/it] 67%|██████▋ | 456/681 [19:20<09:22, 2.50s/it] {'loss': 1.0156, 'grad_norm': 29.1214599609375, 'learning_rate': 1.5019505680714232e-07, 'fcm_dpo/beta': 0.0014169735368341208, 'fcm_dpo/q_t': 0.3890087902545929, 'fcm_dpo/delta': -0.08517496287822723, 'fcm_dpo/margin': 339.55279541015625, 'margin_dpo/margin_mean': 339.552734375, 'margin_dpo/margin_std': 345.1535949707031, 'logps/chosen': -480.4209289550781, 'logps/rejected': -868.1547241210938, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21784210205078, 'KL/chosen_KL_mean': -423.3841552734375, 'KL/rejected_KL_mean': -762.9368896484375, 'KL/mean': -593.1605224609375, 'KL/std': 356.31634521484375, 'logits/chosen': -0.6683529615402222, 'logits/rejected': -0.6928262710571289, 'epoch': 0.67} + 67%|██████▋ | 456/681 [19:20<09:22, 2.50s/it] 67%|██████▋ | 457/681 [19:23<09:32, 2.56s/it] {'loss': 1.0138, 'grad_norm': 29.03094482421875, 'learning_rate': 1.4901974878202627e-07, 'fcm_dpo/beta': 0.0013881283812224865, 'fcm_dpo/q_t': 0.3869348466396332, 'fcm_dpo/delta': -0.08688442409038544, 'fcm_dpo/margin': 347.5188903808594, 'margin_dpo/margin_mean': 347.5188903808594, 'margin_dpo/margin_std': 340.45025634765625, 'logps/chosen': -465.582275390625, 'logps/rejected': -843.9682006835938, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'KL/chosen_KL_mean': -411.3397216796875, 'KL/rejected_KL_mean': -758.858642578125, 'KL/mean': -585.0991821289062, 'KL/std': 348.389892578125, 'logits/chosen': -0.6927535533905029, 'logits/rejected': -0.6937886476516724, 'epoch': 0.67} + 67%|██████▋ | 457/681 [19:23<09:32, 2.56s/it] 67%|██████▋ | 458/681 [19:25<09:27, 2.55s/it] {'loss': 1.0625, 'grad_norm': 23.270376205444336, 'learning_rate': 1.4784710168044212e-07, 'fcm_dpo/beta': 0.0013727301266044378, 'fcm_dpo/q_t': 0.401597797870636, 'fcm_dpo/delta': -0.030638840049505234, 'fcm_dpo/margin': 312.3773193359375, 'margin_dpo/margin_mean': 312.3773193359375, 'margin_dpo/margin_std': 366.5815734863281, 'logps/chosen': -469.21588134765625, 'logps/rejected': -823.8675537109375, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'KL/chosen_KL_mean': -413.8070068359375, 'KL/rejected_KL_mean': -726.184326171875, 'KL/mean': -569.9956665039062, 'KL/std': 318.3636474609375, 'logits/chosen': -0.7083392143249512, 'logits/rejected': -0.7040765285491943, 'epoch': 0.67} + 67%|██████▋ | 458/681 [19:25<09:27, 2.55s/it] 67%|██████▋ | 459/681 [19:28<09:28, 2.56s/it] {'loss': 1.0548, 'grad_norm': 29.07042121887207, 'learning_rate': 1.466771464027316e-07, 'fcm_dpo/beta': 0.0013611916219815612, 'fcm_dpo/q_t': 0.3961183726787567, 'fcm_dpo/delta': -0.05394328758120537, 'fcm_dpo/margin': 331.42340087890625, 'margin_dpo/margin_mean': 331.4234313964844, 'margin_dpo/margin_std': 397.4347839355469, 'logps/chosen': -507.44677734375, 'logps/rejected': -878.4812622070312, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'KL/chosen_KL_mean': -460.8892822265625, 'KL/rejected_KL_mean': -792.312744140625, 'KL/mean': -626.6010131835938, 'KL/std': 359.20355224609375, 'logits/chosen': -0.690535306930542, 'logits/rejected': -0.708480954170227, 'epoch': 0.67} + 67%|██████▋ | 459/681 [19:28<09:28, 2.56s/it] 68%|██████▊ | 460/681 [19:31<09:32, 2.59s/it] {'loss': 1.0251, 'grad_norm': 34.67963790893555, 'learning_rate': 1.4550991377830423e-07, 'fcm_dpo/beta': 0.001346941338852048, 'fcm_dpo/q_t': 0.3894132971763611, 'fcm_dpo/delta': -0.08341852575540543, 'fcm_dpo/margin': 355.984619140625, 'margin_dpo/margin_mean': 355.984619140625, 'margin_dpo/margin_std': 386.4249572753906, 'logps/chosen': -550.154052734375, 'logps/rejected': -958.6231689453125, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'KL/chosen_KL_mean': -498.5191650390625, 'KL/rejected_KL_mean': -854.5037841796875, 'KL/mean': -676.511474609375, 'KL/std': 354.446533203125, 'logits/chosen': -0.7603031396865845, 'logits/rejected': -0.7895260453224182, 'epoch': 0.68} + 68%|██████▊ | 460/681 [19:31<09:32, 2.59s/it] 68%|██████▊ | 461/681 [19:33<09:30, 2.59s/it] {'loss': 1.1057, 'grad_norm': 28.388534545898438, 'learning_rate': 1.4434543456482518e-07, 'fcm_dpo/beta': 0.001346740871667862, 'fcm_dpo/q_t': 0.4133082628250122, 'fcm_dpo/delta': 0.02132502943277359, 'fcm_dpo/margin': 281.6662902832031, 'margin_dpo/margin_mean': 281.666259765625, 'margin_dpo/margin_std': 388.0255432128906, 'logps/chosen': -582.9965209960938, 'logps/rejected': -895.957763671875, 'logps/ref_chosen': -55.18195724487305, 'logps/ref_rejected': -86.47689819335938, 'KL/chosen_KL_mean': -527.8145751953125, 'KL/rejected_KL_mean': -809.4808349609375, 'KL/mean': -668.647705078125, 'KL/std': 355.2934265136719, 'logits/chosen': -0.7709100842475891, 'logits/rejected': -0.7842754125595093, 'epoch': 0.68} + 68%|██████▊ | 461/681 [19:33<09:30, 2.59s/it] 68%|██████▊ | 462/681 [19:36<09:20, 2.56s/it] {'loss': 1.1576, 'grad_norm': 32.2818717956543, 'learning_rate': 1.4318373944740484e-07, 'fcm_dpo/beta': 0.0013595143100246787, 'fcm_dpo/q_t': 0.4269101023674011, 'fcm_dpo/delta': 0.0777268186211586, 'fcm_dpo/margin': 238.94668579101562, 'margin_dpo/margin_mean': 238.94668579101562, 'margin_dpo/margin_std': 403.227294921875, 'logps/chosen': -598.804443359375, 'logps/rejected': -846.6641845703125, 'logps/ref_chosen': -69.92803192138672, 'logps/ref_rejected': -78.84111022949219, 'KL/chosen_KL_mean': -528.8763427734375, 'KL/rejected_KL_mean': -767.8230590820312, 'KL/mean': -648.3497314453125, 'KL/std': 363.253662109375, 'logits/chosen': -0.83504319190979, 'logits/rejected': -0.8283437490463257, 'epoch': 0.68} + 68%|██████▊ | 462/681 [19:36<09:20, 2.56s/it] 68%|██████▊ | 463/681 [19:38<09:10, 2.53s/it] {'loss': 1.0875, 'grad_norm': 36.683773040771484, 'learning_rate': 1.4202485903778976e-07, 'fcm_dpo/beta': 0.0013674467336386442, 'fcm_dpo/q_t': 0.4052046537399292, 'fcm_dpo/delta': -0.018231874331831932, 'fcm_dpo/margin': 305.1488037109375, 'margin_dpo/margin_mean': 305.1487731933594, 'margin_dpo/margin_std': 413.82952880859375, 'logps/chosen': -577.6669921875, 'logps/rejected': -916.5663452148438, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'KL/chosen_KL_mean': -522.392578125, 'KL/rejected_KL_mean': -827.5413818359375, 'KL/mean': -674.9669799804688, 'KL/std': 369.949951171875, 'logits/chosen': -0.7768852710723877, 'logits/rejected': -0.7839001417160034, 'epoch': 0.68} + 68%|██████▊ | 463/681 [19:38<09:10, 2.53s/it] 68%|██████▊ | 464/681 [19:40<08:51, 2.45s/it] {'loss': 0.9451, 'grad_norm': 38.82695770263672, 'learning_rate': 1.4086882387355658e-07, 'fcm_dpo/beta': 0.0013100993819534779, 'fcm_dpo/q_t': 0.3615615665912628, 'fcm_dpo/delta': -0.2194000482559204, 'fcm_dpo/margin': 461.2894592285156, 'margin_dpo/margin_mean': 461.2894287109375, 'margin_dpo/margin_std': 441.2557678222656, 'logps/chosen': -574.5052490234375, 'logps/rejected': -1087.3717041015625, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'KL/chosen_KL_mean': -523.5928955078125, 'KL/rejected_KL_mean': -984.88232421875, 'KL/mean': -754.2376098632812, 'KL/std': 429.15692138671875, 'logits/chosen': -0.7948259115219116, 'logits/rejected': -0.8514028787612915, 'epoch': 0.68} + 68%|██████▊ | 464/681 [19:40<08:51, 2.45s/it] 68%|██████▊ | 465/681 [19:43<08:56, 2.48s/it] {'loss': 1.0303, 'grad_norm': 43.68606948852539, 'learning_rate': 1.3971566441730714e-07, 'fcm_dpo/beta': 0.0012883164454251528, 'fcm_dpo/q_t': 0.38422703742980957, 'fcm_dpo/delta': -0.10184454917907715, 'fcm_dpo/margin': 385.66748046875, 'margin_dpo/margin_mean': 385.66748046875, 'margin_dpo/margin_std': 453.04437255859375, 'logps/chosen': -568.1746826171875, 'logps/rejected': -1007.67138671875, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'KL/chosen_KL_mean': -508.05780029296875, 'KL/rejected_KL_mean': -893.725341796875, 'KL/mean': -700.8915405273438, 'KL/std': 442.56414794921875, 'logits/chosen': -0.7833234071731567, 'logits/rejected': -0.8054988980293274, 'epoch': 0.68} + 68%|██████▊ | 465/681 [19:43<08:56, 2.48s/it] 68%|██████▊ | 466/681 [19:46<09:02, 2.52s/it] {'loss': 1.0868, 'grad_norm': 32.980648040771484, 'learning_rate': 1.3856541105586545e-07, 'fcm_dpo/beta': 0.0012672768207266927, 'fcm_dpo/q_t': 0.4031521677970886, 'fcm_dpo/delta': -0.01892733946442604, 'fcm_dpo/margin': 329.58282470703125, 'margin_dpo/margin_mean': 329.58282470703125, 'margin_dpo/margin_std': 441.3002624511719, 'logps/chosen': -607.2813110351562, 'logps/rejected': -974.2586669921875, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'KL/chosen_KL_mean': -554.3604125976562, 'KL/rejected_KL_mean': -883.9432373046875, 'KL/mean': -719.1517944335938, 'KL/std': 401.8727111816406, 'logits/chosen': -0.7898865938186646, 'logits/rejected': -0.7923921942710876, 'epoch': 0.68} + 68%|██████▊ | 466/681 [19:46<09:02, 2.52s/it] 69%|██████▊ | 467/681 [19:48<08:58, 2.51s/it] {'loss': 1.1327, 'grad_norm': 48.61074447631836, 'learning_rate': 1.3741809409947729e-07, 'fcm_dpo/beta': 0.0012504856567829847, 'fcm_dpo/q_t': 0.4021691381931305, 'fcm_dpo/delta': -0.051718711853027344, 'fcm_dpo/margin': 357.7253112792969, 'margin_dpo/margin_mean': 357.72528076171875, 'margin_dpo/margin_std': 623.5672607421875, 'logps/chosen': -797.6921997070312, 'logps/rejected': -1179.5618896484375, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'KL/chosen_KL_mean': -718.9763793945312, 'KL/rejected_KL_mean': -1076.70166015625, 'KL/mean': -897.8390502929688, 'KL/std': 534.5426635742188, 'logits/chosen': -0.899175763130188, 'logits/rejected': -0.8960803747177124, 'epoch': 0.69} + 69%|██████▊ | 467/681 [19:48<08:58, 2.51s/it] 69%|██████▊ | 468/681 [19:51<09:11, 2.59s/it] {'loss': 1.0183, 'grad_norm': 49.870811462402344, 'learning_rate': 1.362737437810114e-07, 'fcm_dpo/beta': 0.001233407761901617, 'fcm_dpo/q_t': 0.38278520107269287, 'fcm_dpo/delta': -0.13217654824256897, 'fcm_dpo/margin': 425.77752685546875, 'margin_dpo/margin_mean': 425.77752685546875, 'margin_dpo/margin_std': 522.344970703125, 'logps/chosen': -644.6024169921875, 'logps/rejected': -1101.473388671875, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02880859375, 'KL/chosen_KL_mean': -574.6669921875, 'KL/rejected_KL_mean': -1000.444580078125, 'KL/mean': -787.5557861328125, 'KL/std': 476.1236267089844, 'logits/chosen': -0.8734508752822876, 'logits/rejected': -0.8809393644332886, 'epoch': 0.69} + 69%|██████▊ | 468/681 [19:51<09:11, 2.59s/it] 69%|██████▉ | 469/681 [19:54<09:21, 2.65s/it] {'loss': 1.0167, 'grad_norm': 41.835994720458984, 'learning_rate': 1.351323902551631e-07, 'fcm_dpo/beta': 0.0011981693096458912, 'fcm_dpo/q_t': 0.38290101289749146, 'fcm_dpo/delta': -0.11811123043298721, 'fcm_dpo/margin': 426.45867919921875, 'margin_dpo/margin_mean': 426.4587097167969, 'margin_dpo/margin_std': 479.927978515625, 'logps/chosen': -684.2052001953125, 'logps/rejected': -1147.32568359375, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'KL/chosen_KL_mean': -616.0804443359375, 'KL/rejected_KL_mean': -1042.5391845703125, 'KL/mean': -829.309814453125, 'KL/std': 435.38775634765625, 'logits/chosen': -0.871213436126709, 'logits/rejected': -0.8826764822006226, 'epoch': 0.69} + 69%|██████▉ | 469/681 [19:54<09:21, 2.65s/it] 69%|██████▉ | 470/681 [19:56<09:11, 2.62s/it] {'loss': 1.07, 'grad_norm': 28.576284408569336, 'learning_rate': 1.339940635976592e-07, 'fcm_dpo/beta': 0.0011902997503057122, 'fcm_dpo/q_t': 0.39579594135284424, 'fcm_dpo/delta': -0.05773991718888283, 'fcm_dpo/margin': 382.3490905761719, 'margin_dpo/margin_mean': 382.34912109375, 'margin_dpo/margin_std': 517.0437622070312, 'logps/chosen': -570.29296875, 'logps/rejected': -991.552978515625, 'logps/ref_chosen': -43.791927337646484, 'logps/ref_rejected': -82.70285034179688, 'KL/chosen_KL_mean': -526.5010986328125, 'KL/rejected_KL_mean': -908.85009765625, 'KL/mean': -717.6756591796875, 'KL/std': 461.4809265136719, 'logits/chosen': -0.8578736782073975, 'logits/rejected': -0.8692770004272461, 'epoch': 0.69} + 69%|██████▉ | 470/681 [19:56<09:11, 2.62s/it] 69%|██████▉ | 471/681 [19:58<08:51, 2.53s/it] {'loss': 1.1157, 'grad_norm': 33.51979446411133, 'learning_rate': 1.3285879380446563e-07, 'fcm_dpo/beta': 0.0011771449353545904, 'fcm_dpo/q_t': 0.41261669993400574, 'fcm_dpo/delta': 0.0024843141436576843, 'fcm_dpo/margin': 337.25445556640625, 'margin_dpo/margin_mean': 337.25445556640625, 'margin_dpo/margin_std': 525.2001342773438, 'logps/chosen': -731.5079345703125, 'logps/rejected': -1089.033447265625, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'KL/chosen_KL_mean': -668.16845703125, 'KL/rejected_KL_mean': -1005.4229125976562, 'KL/mean': -836.795654296875, 'KL/std': 489.2864990234375, 'logits/chosen': -0.9775102734565735, 'logits/rejected': -0.9886398315429688, 'epoch': 0.69} + 69%|██████▉ | 471/681 [19:58<08:51, 2.53s/it] 69%|██████▉ | 472/681 [20:01<09:04, 2.61s/it] {'loss': 1.098, 'grad_norm': 32.862762451171875, 'learning_rate': 1.317266107909975e-07, 'fcm_dpo/beta': 0.001169139752164483, 'fcm_dpo/q_t': 0.4020659327507019, 'fcm_dpo/delta': -0.060674797743558884, 'fcm_dpo/margin': 391.2642822265625, 'margin_dpo/margin_mean': 391.2642822265625, 'margin_dpo/margin_std': 617.0120849609375, 'logps/chosen': -752.218505859375, 'logps/rejected': -1177.02587890625, 'logps/ref_chosen': -83.66610717773438, 'logps/ref_rejected': -117.20919799804688, 'KL/chosen_KL_mean': -668.5523681640625, 'KL/rejected_KL_mean': -1059.816650390625, 'KL/mean': -864.1845703125, 'KL/std': 573.5145263671875, 'logits/chosen': -0.9176386594772339, 'logits/rejected': -0.8923181295394897, 'epoch': 0.69} + 69%|██████▉ | 472/681 [20:01<09:04, 2.61s/it] 69%|██████▉ | 473/681 [20:04<09:08, 2.64s/it] {'loss': 1.3537, 'grad_norm': 116.70816802978516, 'learning_rate': 1.3059754439133002e-07, 'fcm_dpo/beta': 0.0011696891160681844, 'fcm_dpo/q_t': 0.45167526602745056, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 190.77340698242188, 'margin_dpo/margin_mean': 190.77340698242188, 'margin_dpo/margin_std': 781.681640625, 'logps/chosen': -864.5390625, 'logps/rejected': -1072.9620361328125, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'KL/chosen_KL_mean': -801.0420532226562, 'KL/rejected_KL_mean': -991.8154296875, 'KL/mean': -896.4287109375, 'KL/std': 595.682373046875, 'logits/chosen': -0.896651029586792, 'logits/rejected': -0.8670951128005981, 'epoch': 0.69} + 69%|██████▉ | 473/681 [20:04<09:08, 2.64s/it] 70%|██████▉ | 474/681 [20:07<09:09, 2.65s/it] {'loss': 1.149, 'grad_norm': 38.174800872802734, 'learning_rate': 1.2947162435741277e-07, 'fcm_dpo/beta': 0.001158315921202302, 'fcm_dpo/q_t': 0.4119930565357208, 'fcm_dpo/delta': -0.09819056838750839, 'fcm_dpo/margin': 333.6046447753906, 'margin_dpo/margin_mean': 333.6046142578125, 'margin_dpo/margin_std': 572.8232421875, 'logps/chosen': -700.9415283203125, 'logps/rejected': -1072.0146484375, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'KL/chosen_KL_mean': -648.32958984375, 'KL/rejected_KL_mean': -981.9342041015625, 'KL/mean': -815.1319580078125, 'KL/std': 509.63934326171875, 'logits/chosen': -0.8967859745025635, 'logits/rejected': -0.9001563191413879, 'epoch': 0.7} + 70%|██████▉ | 474/681 [20:07<09:09, 2.65s/it] 70%|██████▉ | 475/681 [20:09<08:55, 2.60s/it] {'loss': 1.0222, 'grad_norm': 39.29723358154297, 'learning_rate': 1.2834888035828596e-07, 'fcm_dpo/beta': 0.0011308316607028246, 'fcm_dpo/q_t': 0.3888343572616577, 'fcm_dpo/delta': -0.08551047742366791, 'fcm_dpo/margin': 425.5621337890625, 'margin_dpo/margin_mean': 425.56219482421875, 'margin_dpo/margin_std': 453.77056884765625, 'logps/chosen': -520.4694213867188, 'logps/rejected': -993.599365234375, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06294250488281, 'KL/chosen_KL_mean': -477.9742126464844, 'KL/rejected_KL_mean': -903.536376953125, 'KL/mean': -690.75537109375, 'KL/std': 416.74029541015625, 'logits/chosen': -0.9726539850234985, 'logits/rejected': -0.9993470907211304, 'epoch': 0.7} + 70%|██████▉ | 475/681 [20:09<08:55, 2.60s/it] 70%|██████▉ | 476/681 [20:12<08:49, 2.58s/it] {'loss': 1.1039, 'grad_norm': 51.48725509643555, 'learning_rate': 1.2722934197929802e-07, 'fcm_dpo/beta': 0.0011303846258670092, 'fcm_dpo/q_t': 0.41371750831604004, 'fcm_dpo/delta': 0.02247927524149418, 'fcm_dpo/margin': 334.7321472167969, 'margin_dpo/margin_mean': 334.732177734375, 'margin_dpo/margin_std': 454.2925720214844, 'logps/chosen': -616.783447265625, 'logps/rejected': -982.2764892578125, 'logps/ref_chosen': -42.94938278198242, 'logps/ref_rejected': -73.71023559570312, 'KL/chosen_KL_mean': -573.8340454101562, 'KL/rejected_KL_mean': -908.5662841796875, 'KL/mean': -741.2000732421875, 'KL/std': 460.19805908203125, 'logits/chosen': -0.9355987310409546, 'logits/rejected': -0.9493337869644165, 'epoch': 0.7} + 70%|██████▉ | 476/681 [20:12<08:49, 2.58s/it] 70%|███████ | 477/681 [20:14<08:46, 2.58s/it] {'loss': 1.1367, 'grad_norm': 32.003143310546875, 'learning_rate': 1.2611303872132631e-07, 'fcm_dpo/beta': 0.001139188650995493, 'fcm_dpo/q_t': 0.4125925898551941, 'fcm_dpo/delta': 0.0156848281621933, 'fcm_dpo/margin': 337.6333923339844, 'margin_dpo/margin_mean': 337.6333923339844, 'margin_dpo/margin_std': 572.432373046875, 'logps/chosen': -691.1122436523438, 'logps/rejected': -1034.1103515625, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'KL/chosen_KL_mean': -620.339599609375, 'KL/rejected_KL_mean': -957.9730224609375, 'KL/mean': -789.1563110351562, 'KL/std': 484.395751953125, 'logits/chosen': -0.9794288873672485, 'logits/rejected': -0.9472505450248718, 'epoch': 0.7} + 70%|███████ | 477/681 [20:14<08:46, 2.58s/it] 70%|███████ | 478/681 [20:17<08:49, 2.61s/it] {'loss': 1.071, 'grad_norm': 36.747989654541016, 'learning_rate': 1.2500000000000005e-07, 'fcm_dpo/beta': 0.0011328569380566478, 'fcm_dpo/q_t': 0.3992430567741394, 'fcm_dpo/delta': -0.04220225661993027, 'fcm_dpo/margin': 388.6256103515625, 'margin_dpo/margin_mean': 388.6256103515625, 'margin_dpo/margin_std': 506.4171142578125, 'logps/chosen': -550.90380859375, 'logps/rejected': -983.450927734375, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'KL/chosen_KL_mean': -509.4632568359375, 'KL/rejected_KL_mean': -898.0889282226562, 'KL/mean': -703.776123046875, 'KL/std': 428.4776611328125, 'logits/chosen': -0.8643758296966553, 'logits/rejected': -0.8856371641159058, 'epoch': 0.7} + 70%|███████ | 478/681 [20:17<08:49, 2.61s/it] 70%|███████ | 479/681 [20:19<08:35, 2.55s/it] {'loss': 1.1188, 'grad_norm': 28.37042236328125, 'learning_rate': 1.2389025514492456e-07, 'fcm_dpo/beta': 0.0011302338680252433, 'fcm_dpo/q_t': 0.40734556317329407, 'fcm_dpo/delta': -0.025240201503038406, 'fcm_dpo/margin': 374.60540771484375, 'margin_dpo/margin_mean': 374.60540771484375, 'margin_dpo/margin_std': 616.9238891601562, 'logps/chosen': -728.072265625, 'logps/rejected': -1143.8861083984375, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'KL/chosen_KL_mean': -674.1643676757812, 'KL/rejected_KL_mean': -1048.769775390625, 'KL/mean': -861.4671020507812, 'KL/std': 529.594482421875, 'logits/chosen': -0.9004903435707092, 'logits/rejected': -0.9291303753852844, 'epoch': 0.7} + 70%|███████ | 479/681 [20:19<08:35, 2.55s/it] 70%|███████ | 480/681 [20:22<08:24, 2.51s/it] {'loss': 1.1837, 'grad_norm': 56.938568115234375, 'learning_rate': 1.227838333989088e-07, 'fcm_dpo/beta': 0.0011154343374073505, 'fcm_dpo/q_t': 0.42702075839042664, 'fcm_dpo/delta': -0.04910217225551605, 'fcm_dpo/margin': 292.4875183105469, 'margin_dpo/margin_mean': 292.48748779296875, 'margin_dpo/margin_std': 548.897216796875, 'logps/chosen': -853.5903930664062, 'logps/rejected': -1170.32763671875, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'KL/chosen_KL_mean': -794.90771484375, 'KL/rejected_KL_mean': -1087.395263671875, 'KL/mean': -941.1514892578125, 'KL/std': 502.28509521484375, 'logits/chosen': -0.9496725797653198, 'logits/rejected': -0.9423930644989014, 'epoch': 0.7} + 70%|███████ | 480/681 [20:22<08:24, 2.51s/it] 71%|███████ | 481/681 [20:24<08:22, 2.51s/it] {'loss': 1.0369, 'grad_norm': 31.939149856567383, 'learning_rate': 1.2168076391719489e-07, 'fcm_dpo/beta': 0.0010935836471617222, 'fcm_dpo/q_t': 0.3881867527961731, 'fcm_dpo/delta': -0.10490460693836212, 'fcm_dpo/margin': 456.9600524902344, 'margin_dpo/margin_mean': 456.96002197265625, 'margin_dpo/margin_std': 578.0529174804688, 'logps/chosen': -721.306640625, 'logps/rejected': -1215.722900390625, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'KL/chosen_KL_mean': -666.3424072265625, 'KL/rejected_KL_mean': -1123.3023681640625, 'KL/mean': -894.8223876953125, 'KL/std': 532.44287109375, 'logits/chosen': -0.9752233028411865, 'logits/rejected': -1.0002660751342773, 'epoch': 0.71} + 71%|███████ | 481/681 [20:24<08:22, 2.51s/it] 71%|███████ | 482/681 [20:27<08:37, 2.60s/it] {'loss': 1.2776, 'grad_norm': 58.26255416870117, 'learning_rate': 1.2058107576668938e-07, 'fcm_dpo/beta': 0.0010957256890833378, 'fcm_dpo/q_t': 0.4466710090637207, 'fcm_dpo/delta': 0.08043741434812546, 'fcm_dpo/margin': 204.8200225830078, 'margin_dpo/margin_mean': 204.82000732421875, 'margin_dpo/margin_std': 610.1978759765625, 'logps/chosen': -772.852294921875, 'logps/rejected': -997.7083740234375, 'logps/ref_chosen': -67.553466796875, 'logps/ref_rejected': -87.58953857421875, 'KL/chosen_KL_mean': -705.298828125, 'KL/rejected_KL_mean': -910.1187744140625, 'KL/mean': -807.7088623046875, 'KL/std': 523.2813720703125, 'logits/chosen': -0.8530906438827515, 'logits/rejected': -0.8416086435317993, 'epoch': 0.71} + 71%|███████ | 482/681 [20:27<08:37, 2.60s/it] 71%|███████ | 483/681 [20:30<08:35, 2.60s/it] {'loss': 1.026, 'grad_norm': 35.892913818359375, 'learning_rate': 1.194847979251979e-07, 'fcm_dpo/beta': 0.0010834920685738325, 'fcm_dpo/q_t': 0.3861696720123291, 'fcm_dpo/delta': -0.11722610890865326, 'fcm_dpo/margin': 471.59527587890625, 'margin_dpo/margin_mean': 471.59527587890625, 'margin_dpo/margin_std': 582.1723022460938, 'logps/chosen': -690.580078125, 'logps/rejected': -1194.632568359375, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'KL/chosen_KL_mean': -627.250244140625, 'KL/rejected_KL_mean': -1098.845458984375, 'KL/mean': -863.0478515625, 'KL/std': 514.4844970703125, 'logits/chosen': -0.9211816787719727, 'logits/rejected': -0.9332787394523621, 'epoch': 0.71} + 71%|███████ | 483/681 [20:30<08:35, 2.60s/it] 71%|███████ | 484/681 [20:32<08:16, 2.52s/it] {'loss': 1.0526, 'grad_norm': 45.20163345336914, 'learning_rate': 1.1839195928066101e-07, 'fcm_dpo/beta': 0.0010744791943579912, 'fcm_dpo/q_t': 0.39632922410964966, 'fcm_dpo/delta': -0.059597231447696686, 'fcm_dpo/margin': 425.05419921875, 'margin_dpo/margin_mean': 425.0542297363281, 'margin_dpo/margin_std': 516.1748657226562, 'logps/chosen': -596.7596435546875, 'logps/rejected': -1047.047119140625, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'KL/chosen_KL_mean': -537.6214599609375, 'KL/rejected_KL_mean': -962.6756591796875, 'KL/mean': -750.1485595703125, 'KL/std': 499.0599670410156, 'logits/chosen': -0.9501423835754395, 'logits/rejected': -0.974023699760437, 'epoch': 0.71} + 71%|███████ | 484/681 [20:32<08:16, 2.52s/it] 71%|███████ | 485/681 [20:34<08:11, 2.51s/it] {'loss': 1.0827, 'grad_norm': 41.25438690185547, 'learning_rate': 1.1730258863039347e-07, 'fcm_dpo/beta': 0.0010628815507516265, 'fcm_dpo/q_t': 0.40300631523132324, 'fcm_dpo/delta': -0.034554317593574524, 'fcm_dpo/margin': 407.4237060546875, 'margin_dpo/margin_mean': 407.4237060546875, 'margin_dpo/margin_std': 568.4012451171875, 'logps/chosen': -610.2982177734375, 'logps/rejected': -1062.2364501953125, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408233642578, 'KL/chosen_KL_mean': -551.4486083984375, 'KL/rejected_KL_mean': -958.8723754882812, 'KL/mean': -755.1604614257812, 'KL/std': 496.97821044921875, 'logits/chosen': -0.8435344696044922, 'logits/rejected': -0.8657543659210205, 'epoch': 0.71} + 71%|███████ | 485/681 [20:34<08:11, 2.51s/it] 71%|███████▏ | 486/681 [20:37<07:42, 2.37s/it] {'loss': 1.0755, 'grad_norm': 33.520912170410156, 'learning_rate': 1.1621671468032493e-07, 'fcm_dpo/beta': 0.0010442393831908703, 'fcm_dpo/q_t': 0.3934386372566223, 'fcm_dpo/delta': -0.08795761317014694, 'fcm_dpo/margin': 463.13702392578125, 'margin_dpo/margin_mean': 463.13702392578125, 'margin_dpo/margin_std': 690.687255859375, 'logps/chosen': -691.0333251953125, 'logps/rejected': -1191.050048828125, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'KL/chosen_KL_mean': -635.773681640625, 'KL/rejected_KL_mean': -1098.91064453125, 'KL/mean': -867.3421630859375, 'KL/std': 573.5325927734375, 'logits/chosen': -0.9062224626541138, 'logits/rejected': -0.9161352515220642, 'epoch': 0.71} + 71%|███████▏ | 486/681 [20:37<07:42, 2.37s/it] 72%|███████▏ | 487/681 [20:39<07:58, 2.46s/it] {'loss': 1.1348, 'grad_norm': 35.01145935058594, 'learning_rate': 1.1513436604424378e-07, 'fcm_dpo/beta': 0.0010508847190067172, 'fcm_dpo/q_t': 0.41665130853652954, 'fcm_dpo/delta': 0.04303121566772461, 'fcm_dpo/margin': 340.75640869140625, 'margin_dpo/margin_mean': 340.75640869140625, 'margin_dpo/margin_std': 538.1895141601562, 'logps/chosen': -735.1783447265625, 'logps/rejected': -1115.290283203125, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.41883087158203, 'KL/chosen_KL_mean': -682.1150512695312, 'KL/rejected_KL_mean': -1022.8714599609375, 'KL/mean': -852.4932861328125, 'KL/std': 518.8946533203125, 'logits/chosen': -0.9379677772521973, 'logits/rejected': -0.9421348571777344, 'epoch': 0.72} + 72%|███████▏ | 487/681 [20:39<07:58, 2.46s/it] 72%|███████▏ | 488/681 [20:42<08:05, 2.52s/it] {'loss': 1.098, 'grad_norm': 31.856176376342773, 'learning_rate': 1.1405557124304335e-07, 'fcm_dpo/beta': 0.0010568746365606785, 'fcm_dpo/q_t': 0.4128245711326599, 'fcm_dpo/delta': 0.024668315425515175, 'fcm_dpo/margin': 355.7095947265625, 'margin_dpo/margin_mean': 355.7095642089844, 'margin_dpo/margin_std': 445.57000732421875, 'logps/chosen': -601.6868896484375, 'logps/rejected': -989.1748046875, 'logps/ref_chosen': -52.22815704345703, 'logps/ref_rejected': -84.00656127929688, 'KL/chosen_KL_mean': -549.458740234375, 'KL/rejected_KL_mean': -905.1682739257812, 'KL/mean': -727.3135375976562, 'KL/std': 453.9654235839844, 'logits/chosen': -0.8694427609443665, 'logits/rejected': -0.8729550838470459, 'epoch': 0.72} + 72%|███████▏ | 488/681 [20:42<08:05, 2.52s/it] 72%|███████▏ | 489/681 [20:44<07:59, 2.50s/it] {'loss': 1.1035, 'grad_norm': 29.248310089111328, 'learning_rate': 1.1298035870396985e-07, 'fcm_dpo/beta': 0.001060036476701498, 'fcm_dpo/q_t': 0.41301560401916504, 'fcm_dpo/delta': 0.013825876638293266, 'fcm_dpo/margin': 364.5850830078125, 'margin_dpo/margin_mean': 364.5850830078125, 'margin_dpo/margin_std': 510.5589599609375, 'logps/chosen': -563.8619384765625, 'logps/rejected': -951.85546875, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39812469482422, 'KL/chosen_KL_mean': -507.87225341796875, 'KL/rejected_KL_mean': -872.4573974609375, 'KL/mean': -690.164794921875, 'KL/std': 465.132568359375, 'logits/chosen': -0.9246722459793091, 'logits/rejected': -0.9232733249664307, 'epoch': 0.72} + 72%|███████▏ | 489/681 [20:44<07:59, 2.50s/it] 72%|███████▏ | 490/681 [20:47<08:10, 2.57s/it] {'loss': 1.1355, 'grad_norm': 38.29257583618164, 'learning_rate': 1.1190875675987355e-07, 'fcm_dpo/beta': 0.0010612778132781386, 'fcm_dpo/q_t': 0.4127916693687439, 'fcm_dpo/delta': 0.007974715903401375, 'fcm_dpo/margin': 369.5592041015625, 'margin_dpo/margin_mean': 369.5591735839844, 'margin_dpo/margin_std': 631.1531982421875, 'logps/chosen': -659.435546875, 'logps/rejected': -1087.037353515625, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.4090576171875, 'KL/chosen_KL_mean': -607.0692138671875, 'KL/rejected_KL_mean': -976.6283569335938, 'KL/mean': -791.8487548828125, 'KL/std': 546.3399047851562, 'logits/chosen': -0.9299312829971313, 'logits/rejected': -0.9667763710021973, 'epoch': 0.72} + 72%|███████▏ | 490/681 [20:47<08:10, 2.57s/it] 72%|███████▏ | 491/681 [20:49<08:02, 2.54s/it] {'loss': 1.1979, 'grad_norm': 29.697641372680664, 'learning_rate': 1.1084079364846241e-07, 'fcm_dpo/beta': 0.0010824804194271564, 'fcm_dpo/q_t': 0.43931227922439575, 'fcm_dpo/delta': 0.13659176230430603, 'fcm_dpo/margin': 246.49998474121094, 'margin_dpo/margin_mean': 246.5, 'margin_dpo/margin_std': 469.1783752441406, 'logps/chosen': -644.9371948242188, 'logps/rejected': -904.59375, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'KL/chosen_KL_mean': -584.8209228515625, 'KL/rejected_KL_mean': -831.3209228515625, 'KL/mean': -708.0709228515625, 'KL/std': 479.92486572265625, 'logits/chosen': -0.9027219414710999, 'logits/rejected': -0.8923330307006836, 'epoch': 0.72} + 72%|███████▏ | 491/681 [20:50<08:02, 2.54s/it] 72%|███████▏ | 492/681 [20:52<08:10, 2.59s/it] {'loss': 1.2164, 'grad_norm': 31.3509521484375, 'learning_rate': 1.097764975115576e-07, 'fcm_dpo/beta': 0.001107184449210763, 'fcm_dpo/q_t': 0.4381140470504761, 'fcm_dpo/delta': 0.12549251317977905, 'fcm_dpo/margin': 251.2227325439453, 'margin_dpo/margin_mean': 251.22274780273438, 'margin_dpo/margin_std': 553.6586303710938, 'logps/chosen': -657.315673828125, 'logps/rejected': -927.203857421875, 'logps/ref_chosen': -53.994178771972656, 'logps/ref_rejected': -72.65962219238281, 'KL/chosen_KL_mean': -603.3214721679688, 'KL/rejected_KL_mean': -854.5443115234375, 'KL/mean': -728.932861328125, 'KL/std': 482.99114990234375, 'logits/chosen': -0.9563778638839722, 'logits/rejected': -0.9354947805404663, 'epoch': 0.72} + 72%|███████▏ | 492/681 [20:52<08:10, 2.59s/it] 72%|███████▏ | 493/681 [20:55<08:13, 2.63s/it] {'loss': 1.1783, 'grad_norm': 33.60331344604492, 'learning_rate': 1.0871589639435203e-07, 'fcm_dpo/beta': 0.001113426173105836, 'fcm_dpo/q_t': 0.42827779054641724, 'fcm_dpo/delta': -0.012180797755718231, 'fcm_dpo/margin': 275.09185791015625, 'margin_dpo/margin_mean': 275.09185791015625, 'margin_dpo/margin_std': 499.40582275390625, 'logps/chosen': -692.886474609375, 'logps/rejected': -979.8040771484375, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'KL/chosen_KL_mean': -617.3892211914062, 'KL/rejected_KL_mean': -892.4810791015625, 'KL/mean': -754.9351806640625, 'KL/std': 521.77880859375, 'logits/chosen': -0.975821852684021, 'logits/rejected': -0.9446998834609985, 'epoch': 0.72} + 72%|███████▏ | 493/681 [20:55<08:13, 2.63s/it] 73%|███████▎ | 494/681 [20:58<08:10, 2.62s/it] {'loss': 1.0186, 'grad_norm': 42.78213882446289, 'learning_rate': 1.0765901824467166e-07, 'fcm_dpo/beta': 0.0010977558558806777, 'fcm_dpo/q_t': 0.38731634616851807, 'fcm_dpo/delta': -0.09117947518825531, 'fcm_dpo/margin': 443.3415832519531, 'margin_dpo/margin_mean': 443.34161376953125, 'margin_dpo/margin_std': 465.9763488769531, 'logps/chosen': -528.9733276367188, 'logps/rejected': -1017.0469970703125, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'KL/chosen_KL_mean': -487.6140441894531, 'KL/rejected_KL_mean': -930.9556884765625, 'KL/mean': -709.284912109375, 'KL/std': 471.108154296875, 'logits/chosen': -0.8541857600212097, 'logits/rejected': -0.8858389854431152, 'epoch': 0.73} + 73%|███████▎ | 494/681 [20:58<08:10, 2.62s/it] 73%|███████▎ | 495/681 [21:00<08:17, 2.68s/it] {'loss': 1.0979, 'grad_norm': 31.058788299560547, 'learning_rate': 1.0660589091223854e-07, 'fcm_dpo/beta': 0.0010909372940659523, 'fcm_dpo/q_t': 0.4072011411190033, 'fcm_dpo/delta': -0.019423317164182663, 'fcm_dpo/margin': 383.71905517578125, 'margin_dpo/margin_mean': 383.71905517578125, 'margin_dpo/margin_std': 579.0, 'logps/chosen': -613.523681640625, 'logps/rejected': -1025.132080078125, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'KL/chosen_KL_mean': -549.9886474609375, 'KL/rejected_KL_mean': -933.7076416015625, 'KL/mean': -741.8480834960938, 'KL/std': 484.11676025390625, 'logits/chosen': -0.9760909080505371, 'logits/rejected': -0.9824463725090027, 'epoch': 0.73} + 73%|███████▎ | 495/681 [21:00<08:17, 2.68s/it] 73%|███████▎ | 496/681 [21:03<08:15, 2.68s/it] {'loss': 1.2216, 'grad_norm': 56.444026947021484, 'learning_rate': 1.0555654214793722e-07, 'fcm_dpo/beta': 0.0011184395989403129, 'fcm_dpo/q_t': 0.4458683431148529, 'fcm_dpo/delta': 0.17267850041389465, 'fcm_dpo/margin': 206.68927001953125, 'margin_dpo/margin_mean': 206.6892852783203, 'margin_dpo/margin_std': 417.4690856933594, 'logps/chosen': -750.7238159179688, 'logps/rejected': -969.1505126953125, 'logps/ref_chosen': -72.5919189453125, 'logps/ref_rejected': -84.32933807373047, 'KL/chosen_KL_mean': -678.1318359375, 'KL/rejected_KL_mean': -884.8211669921875, 'KL/mean': -781.4765625, 'KL/std': 376.869384765625, 'logits/chosen': -0.9465994238853455, 'logits/rejected': -0.9162840843200684, 'epoch': 0.73} + 73%|███████▎ | 496/681 [21:03<08:15, 2.68s/it] 73%|███████▎ | 497/681 [21:06<08:08, 2.65s/it] {'loss': 1.2261, 'grad_norm': 34.23469924926758, 'learning_rate': 1.0451099960308374e-07, 'fcm_dpo/beta': 0.0011322898790240288, 'fcm_dpo/q_t': 0.4449055790901184, 'fcm_dpo/delta': 0.021924598142504692, 'fcm_dpo/margin': 210.45884704589844, 'margin_dpo/margin_mean': 210.45883178710938, 'margin_dpo/margin_std': 444.05548095703125, 'logps/chosen': -695.841064453125, 'logps/rejected': -923.9942626953125, 'logps/ref_chosen': -58.59397506713867, 'logps/ref_rejected': -76.28836822509766, 'KL/chosen_KL_mean': -637.2471313476562, 'KL/rejected_KL_mean': -847.7059326171875, 'KL/mean': -742.4765625, 'KL/std': 471.04632568359375, 'logits/chosen': -0.89613938331604, 'logits/rejected': -0.8781349658966064, 'epoch': 0.73} + 73%|███████▎ | 497/681 [21:06<08:08, 2.65s/it] 73%|███████▎ | 498/681 [21:08<08:11, 2.68s/it] {'loss': 1.1264, 'grad_norm': 39.67582321166992, 'learning_rate': 1.0346929082869641e-07, 'fcm_dpo/beta': 0.001133624231442809, 'fcm_dpo/q_t': 0.4114514887332916, 'fcm_dpo/delta': 0.01301711704581976, 'fcm_dpo/margin': 341.8111267089844, 'margin_dpo/margin_mean': 341.8111267089844, 'margin_dpo/margin_std': 551.8848266601562, 'logps/chosen': -684.4744873046875, 'logps/rejected': -1039.0379638671875, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'KL/chosen_KL_mean': -613.268798828125, 'KL/rejected_KL_mean': -955.0799560546875, 'KL/mean': -784.17431640625, 'KL/std': 500.90350341796875, 'logits/chosen': -0.9115738868713379, 'logits/rejected': -0.8932760953903198, 'epoch': 0.73} + 73%|███████▎ | 498/681 [21:08<08:11, 2.68s/it] 73%|███████▎ | 499/681 [21:11<07:59, 2.63s/it] {'loss': 1.0567, 'grad_norm': 47.4464111328125, 'learning_rate': 1.0243144327477013e-07, 'fcm_dpo/beta': 0.0011214257683604956, 'fcm_dpo/q_t': 0.39359456300735474, 'fcm_dpo/delta': -0.07398218661546707, 'fcm_dpo/margin': 419.45245361328125, 'margin_dpo/margin_mean': 419.45245361328125, 'margin_dpo/margin_std': 553.3563232421875, 'logps/chosen': -583.9425048828125, 'logps/rejected': -1053.218505859375, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'KL/chosen_KL_mean': -532.6873168945312, 'KL/rejected_KL_mean': -952.1397705078125, 'KL/mean': -742.4135131835938, 'KL/std': 498.259033203125, 'logits/chosen': -0.9583698511123657, 'logits/rejected': -0.991510272026062, 'epoch': 0.73} + 73%|███████▎ | 499/681 [21:11<07:59, 2.63s/it] 73%|███████▎ | 500/681 [21:13<07:46, 2.58s/it] {'loss': 1.1254, 'grad_norm': 42.036109924316406, 'learning_rate': 1.0139748428955333e-07, 'fcm_dpo/beta': 0.00111986487172544, 'fcm_dpo/q_t': 0.41234683990478516, 'fcm_dpo/delta': 0.015957213938236237, 'fcm_dpo/margin': 343.4754333496094, 'margin_dpo/margin_mean': 343.4754333496094, 'margin_dpo/margin_std': 541.9052734375, 'logps/chosen': -700.7294921875, 'logps/rejected': -1081.1116943359375, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'KL/chosen_KL_mean': -643.7020874023438, 'KL/rejected_KL_mean': -987.177490234375, 'KL/mean': -815.4398193359375, 'KL/std': 434.6232604980469, 'logits/chosen': -0.9217053055763245, 'logits/rejected': -0.9468744993209839, 'epoch': 0.73} + 73%|███████▎ | 500/681 [21:13<07:46, 2.58s/it] 74%|███████▎ | 501/681 [21:16<07:41, 2.57s/it] {'loss': 1.113, 'grad_norm': 31.888412475585938, 'learning_rate': 1.0036744111882672e-07, 'fcm_dpo/beta': 0.0011241002939641476, 'fcm_dpo/q_t': 0.4085647165775299, 'fcm_dpo/delta': -0.008717566728591919, 'fcm_dpo/margin': 363.14068603515625, 'margin_dpo/margin_mean': 363.14068603515625, 'margin_dpo/margin_std': 566.4456787109375, 'logps/chosen': -611.2513427734375, 'logps/rejected': -1000.189208984375, 'logps/ref_chosen': -54.359527587890625, 'logps/ref_rejected': -80.15670013427734, 'KL/chosen_KL_mean': -556.8917846679688, 'KL/rejected_KL_mean': -920.032470703125, 'KL/mean': -738.462158203125, 'KL/std': 467.4353942871094, 'logits/chosen': -0.8804645538330078, 'logits/rejected': -0.8640455007553101, 'epoch': 0.74} + 74%|███████▎ | 501/681 [21:16<07:41, 2.57s/it] 74%|███████▎ | 502/681 [21:18<07:40, 2.57s/it] {'loss': 1.067, 'grad_norm': 25.909517288208008, 'learning_rate': 9.934134090518592e-08, 'fcm_dpo/beta': 0.001116321887820959, 'fcm_dpo/q_t': 0.40353554487228394, 'fcm_dpo/delta': -0.027478674426674843, 'fcm_dpo/margin': 381.8934326171875, 'margin_dpo/margin_mean': 381.8934326171875, 'margin_dpo/margin_std': 470.9022216796875, 'logps/chosen': -574.70068359375, 'logps/rejected': -971.9423828125, 'logps/ref_chosen': -67.60050964355469, 'logps/ref_rejected': -82.94876098632812, 'KL/chosen_KL_mean': -507.1001892089844, 'KL/rejected_KL_mean': -888.99365234375, 'KL/mean': -698.0469360351562, 'KL/std': 443.9556579589844, 'logits/chosen': -0.8332573175430298, 'logits/rejected': -0.8167060017585754, 'epoch': 0.74} + 74%|███████▎ | 502/681 [21:18<07:40, 2.57s/it] 74%|███████▍ | 503/681 [21:21<07:50, 2.65s/it] {'loss': 1.0968, 'grad_norm': 23.40604591369629, 'learning_rate': 9.831921068732571e-08, 'fcm_dpo/beta': 0.0011143197771161795, 'fcm_dpo/q_t': 0.41295260190963745, 'fcm_dpo/delta': 0.018423786386847496, 'fcm_dpo/margin': 342.99078369140625, 'margin_dpo/margin_mean': 342.99078369140625, 'margin_dpo/margin_std': 444.3095397949219, 'logps/chosen': -546.9080810546875, 'logps/rejected': -917.3258666992188, 'logps/ref_chosen': -55.078407287597656, 'logps/ref_rejected': -82.50544738769531, 'KL/chosen_KL_mean': -491.82965087890625, 'KL/rejected_KL_mean': -834.8204345703125, 'KL/mean': -663.3250732421875, 'KL/std': 403.2861022949219, 'logits/chosen': -0.868687629699707, 'logits/rejected': -0.8567318320274353, 'epoch': 0.74} + 74%|███████▍ | 503/681 [21:21<07:50, 2.65s/it] 74%|███████▍ | 504/681 [21:24<07:43, 2.62s/it] {'loss': 1.0645, 'grad_norm': 27.982126235961914, 'learning_rate': 9.730107739932805e-08, 'fcm_dpo/beta': 0.001110826968215406, 'fcm_dpo/q_t': 0.39741408824920654, 'fcm_dpo/delta': -0.0547223836183548, 'fcm_dpo/margin': 407.1573486328125, 'margin_dpo/margin_mean': 407.1573486328125, 'margin_dpo/margin_std': 519.5411987304688, 'logps/chosen': -612.1016845703125, 'logps/rejected': -1063.055419921875, 'logps/ref_chosen': -59.96575164794922, 'logps/ref_rejected': -103.76212310791016, 'KL/chosen_KL_mean': -552.1359252929688, 'KL/rejected_KL_mean': -959.2933349609375, 'KL/mean': -755.714599609375, 'KL/std': 474.62152099609375, 'logits/chosen': -0.8701947927474976, 'logits/rejected': -0.8900790214538574, 'epoch': 0.74} + 74%|███████▍ | 504/681 [21:24<07:43, 2.62s/it] 74%|███████▍ | 505/681 [21:26<07:39, 2.61s/it] {'loss': 1.2236, 'grad_norm': 34.76630783081055, 'learning_rate': 9.628696786995188e-08, 'fcm_dpo/beta': 0.0011349001433700323, 'fcm_dpo/q_t': 0.4452478885650635, 'fcm_dpo/delta': 0.1667182594537735, 'fcm_dpo/margin': 208.89784240722656, 'margin_dpo/margin_mean': 208.8978271484375, 'margin_dpo/margin_std': 435.1164245605469, 'logps/chosen': -701.6708374023438, 'logps/rejected': -922.9991455078125, 'logps/ref_chosen': -76.1549072265625, 'logps/ref_rejected': -88.58537292480469, 'KL/chosen_KL_mean': -625.5159301757812, 'KL/rejected_KL_mean': -834.413818359375, 'KL/mean': -729.96484375, 'KL/std': 455.81085205078125, 'logits/chosen': -0.8760533332824707, 'logits/rejected': -0.8471982479095459, 'epoch': 0.74} + 74%|███████▍ | 505/681 [21:26<07:39, 2.61s/it] 74%|███████▍ | 506/681 [21:29<07:25, 2.55s/it] {'loss': 1.0915, 'grad_norm': 38.504554748535156, 'learning_rate': 9.527690882192635e-08, 'fcm_dpo/beta': 0.0011371751315891743, 'fcm_dpo/q_t': 0.40623512864112854, 'fcm_dpo/delta': -0.01079019159078598, 'fcm_dpo/margin': 360.545654296875, 'margin_dpo/margin_mean': 360.545654296875, 'margin_dpo/margin_std': 491.61199951171875, 'logps/chosen': -551.7952880859375, 'logps/rejected': -941.7955322265625, 'logps/ref_chosen': -48.96050262451172, 'logps/ref_rejected': -78.41505432128906, 'KL/chosen_KL_mean': -502.8348083496094, 'KL/rejected_KL_mean': -863.3804931640625, 'KL/mean': -683.107666015625, 'KL/std': 453.17449951171875, 'logits/chosen': -0.9111833572387695, 'logits/rejected': -0.9226495027542114, 'epoch': 0.74} + 74%|███████▍ | 506/681 [21:29<07:25, 2.55s/it] 74%|███████▍ | 507/681 [21:31<07:26, 2.57s/it] {'loss': 1.1484, 'grad_norm': 36.931053161621094, 'learning_rate': 9.427092687124691e-08, 'fcm_dpo/beta': 0.0011462382972240448, 'fcm_dpo/q_t': 0.4203076958656311, 'fcm_dpo/delta': 0.03460888937115669, 'fcm_dpo/margin': 319.8925476074219, 'margin_dpo/margin_mean': 319.8925476074219, 'margin_dpo/margin_std': 573.2492065429688, 'logps/chosen': -658.3242797851562, 'logps/rejected': -1006.7882690429688, 'logps/ref_chosen': -66.80149841308594, 'logps/ref_rejected': -95.37289428710938, 'KL/chosen_KL_mean': -591.5228271484375, 'KL/rejected_KL_mean': -911.4154052734375, 'KL/mean': -751.4690551757812, 'KL/std': 523.9932250976562, 'logits/chosen': -0.9362499713897705, 'logits/rejected': -0.9389501214027405, 'epoch': 0.74} + 74%|███████▍ | 507/681 [21:31<07:26, 2.57s/it] 75%|███████▍ | 508/681 [21:34<07:27, 2.59s/it] {'loss': 1.2033, 'grad_norm': 38.228172302246094, 'learning_rate': 9.326904852647344e-08, 'fcm_dpo/beta': 0.001165491994470358, 'fcm_dpo/q_t': 0.4301344156265259, 'fcm_dpo/delta': 0.08244814723730087, 'fcm_dpo/margin': 274.52154541015625, 'margin_dpo/margin_mean': 274.5215759277344, 'margin_dpo/margin_std': 596.1304931640625, 'logps/chosen': -698.4112548828125, 'logps/rejected': -997.2568359375, 'logps/ref_chosen': -71.303466796875, 'logps/ref_rejected': -95.6275405883789, 'KL/chosen_KL_mean': -627.1077880859375, 'KL/rejected_KL_mean': -901.6292724609375, 'KL/mean': -764.3685302734375, 'KL/std': 510.8812255859375, 'logits/chosen': -0.8927318453788757, 'logits/rejected': -0.890540599822998, 'epoch': 0.75} + 75%|███████▍ | 508/681 [21:34<07:27, 2.59s/it] 75%|███████▍ | 509/681 [21:37<07:27, 2.60s/it] {'loss': 1.1333, 'grad_norm': 29.72893524169922, 'learning_rate': 9.227130018803195e-08, 'fcm_dpo/beta': 0.0011804470559582114, 'fcm_dpo/q_t': 0.41989073157310486, 'fcm_dpo/delta': 0.04675152152776718, 'fcm_dpo/margin': 300.30303955078125, 'margin_dpo/margin_mean': 300.30303955078125, 'margin_dpo/margin_std': 463.26605224609375, 'logps/chosen': -546.5865478515625, 'logps/rejected': -866.3270874023438, 'logps/ref_chosen': -63.81895065307617, 'logps/ref_rejected': -83.25643920898438, 'KL/chosen_KL_mean': -482.7675476074219, 'KL/rejected_KL_mean': -783.0706176757812, 'KL/mean': -632.9190673828125, 'KL/std': 380.39312744140625, 'logits/chosen': -0.8038022518157959, 'logits/rejected': -0.79693204164505, 'epoch': 0.75} + 75%|███████▍ | 509/681 [21:37<07:27, 2.60s/it] 75%|███████▍ | 510/681 [21:39<07:30, 2.63s/it] {'loss': 1.0473, 'grad_norm': 29.25191307067871, 'learning_rate': 9.127770814751932e-08, 'fcm_dpo/beta': 0.0011725020594894886, 'fcm_dpo/q_t': 0.3961718678474426, 'fcm_dpo/delta': -0.046999622136354446, 'fcm_dpo/margin': 379.4727478027344, 'margin_dpo/margin_mean': 379.47271728515625, 'margin_dpo/margin_std': 420.7159423828125, 'logps/chosen': -640.6284790039062, 'logps/rejected': -1070.98779296875, 'logps/ref_chosen': -51.878448486328125, 'logps/ref_rejected': -102.7651596069336, 'KL/chosen_KL_mean': -588.75, 'KL/rejected_KL_mean': -968.22265625, 'KL/mean': -778.4863891601562, 'KL/std': 429.193115234375, 'logits/chosen': -0.8102399110794067, 'logits/rejected': -0.8293131589889526, 'epoch': 0.75} + 75%|███████▍ | 510/681 [21:39<07:30, 2.63s/it] 75%|███████▌ | 511/681 [21:42<07:24, 2.62s/it] {'loss': 1.1575, 'grad_norm': 39.78738021850586, 'learning_rate': 9.028829858700973e-08, 'fcm_dpo/beta': 0.001175806624814868, 'fcm_dpo/q_t': 0.4179048538208008, 'fcm_dpo/delta': 0.039014674723148346, 'fcm_dpo/margin': 308.1739196777344, 'margin_dpo/margin_mean': 308.17388916015625, 'margin_dpo/margin_std': 568.642333984375, 'logps/chosen': -619.7030029296875, 'logps/rejected': -960.49560546875, 'logps/ref_chosen': -60.23811721801758, 'logps/ref_rejected': -92.85676574707031, 'KL/chosen_KL_mean': -559.4649658203125, 'KL/rejected_KL_mean': -867.6387939453125, 'KL/mean': -713.5518798828125, 'KL/std': 470.72216796875, 'logits/chosen': -0.8937386274337769, 'logits/rejected': -0.8976330161094666, 'epoch': 0.75} + 75%|███████▌ | 511/681 [21:42<07:24, 2.62s/it] 75%|███████▌ | 512/681 [21:44<07:04, 2.51s/it] {'loss': 1.0153, 'grad_norm': 51.16664505004883, 'learning_rate': 8.930309757836516e-08, 'fcm_dpo/beta': 0.0011590380454435945, 'fcm_dpo/q_t': 0.38680607080459595, 'fcm_dpo/delta': -0.09211389720439911, 'fcm_dpo/margin': 420.5235595703125, 'margin_dpo/margin_mean': 420.5235290527344, 'margin_dpo/margin_std': 434.5292053222656, 'logps/chosen': -508.2310791015625, 'logps/rejected': -955.7249755859375, 'logps/ref_chosen': -54.905494689941406, 'logps/ref_rejected': -81.87586975097656, 'KL/chosen_KL_mean': -453.3255615234375, 'KL/rejected_KL_mean': -873.84912109375, 'KL/mean': -663.5873413085938, 'KL/std': 430.00079345703125, 'logits/chosen': -0.8603556752204895, 'logits/rejected': -0.8814679384231567, 'epoch': 0.75} + 75%|███████▌ | 512/681 [21:44<07:04, 2.51s/it] 75%|███████▌ | 513/681 [21:47<07:12, 2.58s/it] {'loss': 1.1394, 'grad_norm': 40.44023513793945, 'learning_rate': 8.832213108254863e-08, 'fcm_dpo/beta': 0.0011576918186619878, 'fcm_dpo/q_t': 0.42015981674194336, 'fcm_dpo/delta': 0.04973098263144493, 'fcm_dpo/margin': 303.75506591796875, 'margin_dpo/margin_mean': 303.75506591796875, 'margin_dpo/margin_std': 474.69561767578125, 'logps/chosen': -622.2838134765625, 'logps/rejected': -937.1848754882812, 'logps/ref_chosen': -64.91644287109375, 'logps/ref_rejected': -76.06245422363281, 'KL/chosen_KL_mean': -557.367431640625, 'KL/rejected_KL_mean': -861.1224365234375, 'KL/mean': -709.244873046875, 'KL/std': 405.1826171875, 'logits/chosen': -0.914627194404602, 'logits/rejected': -0.8992458581924438, 'epoch': 0.75} + 75%|███████▌ | 513/681 [21:47<07:12, 2.58s/it] 75%|███████▌ | 514/681 [21:50<07:16, 2.61s/it] {'loss': 1.1468, 'grad_norm': 35.93750762939453, 'learning_rate': 8.734542494893954e-08, 'fcm_dpo/beta': 0.0011787754483520985, 'fcm_dpo/q_t': 0.42276865243911743, 'fcm_dpo/delta': 0.05473232641816139, 'fcm_dpo/margin': 294.3906555175781, 'margin_dpo/margin_mean': 294.39068603515625, 'margin_dpo/margin_std': 494.26495361328125, 'logps/chosen': -654.4765625, 'logps/rejected': -953.5831298828125, 'logps/ref_chosen': -74.22957611083984, 'logps/ref_rejected': -78.945556640625, 'KL/chosen_KL_mean': -580.2469482421875, 'KL/rejected_KL_mean': -874.6375732421875, 'KL/mean': -727.4422607421875, 'KL/std': 435.10223388671875, 'logits/chosen': -0.8526400327682495, 'logits/rejected': -0.8441455364227295, 'epoch': 0.75} + 75%|███████▌ | 514/681 [21:50<07:16, 2.61s/it] 76%|███████▌ | 515/681 [21:52<07:18, 2.64s/it] {'loss': 1.1849, 'grad_norm': 42.832855224609375, 'learning_rate': 8.637300491465272e-08, 'fcm_dpo/beta': 0.0012007859768345952, 'fcm_dpo/q_t': 0.43298569321632385, 'fcm_dpo/delta': 0.11678852140903473, 'fcm_dpo/margin': 238.64556884765625, 'margin_dpo/margin_mean': 238.6455841064453, 'margin_dpo/margin_std': 437.5038146972656, 'logps/chosen': -545.8444213867188, 'logps/rejected': -821.1862182617188, 'logps/ref_chosen': -50.40156555175781, 'logps/ref_rejected': -87.09774780273438, 'KL/chosen_KL_mean': -495.4428405761719, 'KL/rejected_KL_mean': -734.0885009765625, 'KL/mean': -614.765625, 'KL/std': 379.7894287109375, 'logits/chosen': -0.8383795022964478, 'logits/rejected': -0.8531197905540466, 'epoch': 0.76} + 76%|███████▌ | 515/681 [21:52<07:18, 2.64s/it] 76%|███████▌ | 516/681 [21:55<06:53, 2.51s/it] {'loss': 1.0818, 'grad_norm': 54.36648941040039, 'learning_rate': 8.540489660386064e-08, 'fcm_dpo/beta': 0.0012106327340006828, 'fcm_dpo/q_t': 0.4044458270072937, 'fcm_dpo/delta': -0.01020483672618866, 'fcm_dpo/margin': 338.2943420410156, 'margin_dpo/margin_mean': 338.2943420410156, 'margin_dpo/margin_std': 424.7276611328125, 'logps/chosen': -594.807373046875, 'logps/rejected': -980.1744995117188, 'logps/ref_chosen': -64.64956665039062, 'logps/ref_rejected': -111.72237396240234, 'KL/chosen_KL_mean': -530.1577758789062, 'KL/rejected_KL_mean': -868.4521484375, 'KL/mean': -699.3049926757812, 'KL/std': 425.07623291015625, 'logits/chosen': -0.8974713087081909, 'logits/rejected': -0.9246504902839661, 'epoch': 0.76} + 76%|███████▌ | 516/681 [21:55<06:53, 2.51s/it] 76%|███████▌ | 517/681 [21:57<06:51, 2.51s/it] {'loss': 1.0518, 'grad_norm': 29.054262161254883, 'learning_rate': 8.444112552711752e-08, 'fcm_dpo/beta': 0.0011898339726030827, 'fcm_dpo/q_t': 0.3944876194000244, 'fcm_dpo/delta': -0.07756029814481735, 'fcm_dpo/margin': 398.1460266113281, 'margin_dpo/margin_mean': 398.14605712890625, 'margin_dpo/margin_std': 519.8525390625, 'logps/chosen': -620.2119750976562, 'logps/rejected': -1046.527587890625, 'logps/ref_chosen': -60.913551330566406, 'logps/ref_rejected': -89.08308410644531, 'KL/chosen_KL_mean': -559.2984008789062, 'KL/rejected_KL_mean': -957.4444580078125, 'KL/mean': -758.3714599609375, 'KL/std': 472.23651123046875, 'logits/chosen': -0.8497953414916992, 'logits/rejected': -0.8503054976463318, 'epoch': 0.76} + 76%|███████▌ | 517/681 [21:57<06:51, 2.51s/it] 76%|███████▌ | 518/681 [22:00<06:59, 2.57s/it] {'loss': 1.0875, 'grad_norm': 54.556785583496094, 'learning_rate': 8.348171708068747e-08, 'fcm_dpo/beta': 0.0011824161047115922, 'fcm_dpo/q_t': 0.40656790137290955, 'fcm_dpo/delta': -0.002931937575340271, 'fcm_dpo/margin': 340.473388671875, 'margin_dpo/margin_mean': 340.473388671875, 'margin_dpo/margin_std': 436.8955078125, 'logps/chosen': -585.6452026367188, 'logps/rejected': -953.9754638671875, 'logps/ref_chosen': -57.45589065551758, 'logps/ref_rejected': -85.31269836425781, 'KL/chosen_KL_mean': -528.1893310546875, 'KL/rejected_KL_mean': -868.6627197265625, 'KL/mean': -698.426025390625, 'KL/std': 393.81671142578125, 'logits/chosen': -0.8558133840560913, 'logits/rejected': -0.874567985534668, 'epoch': 0.76} + 76%|███████▌ | 518/681 [22:00<06:59, 2.57s/it] 76%|███████▌ | 519/681 [22:03<07:11, 2.66s/it] {'loss': 1.183, 'grad_norm': 33.39023971557617, 'learning_rate': 8.25266965458755e-08, 'fcm_dpo/beta': 0.001208610599860549, 'fcm_dpo/q_t': 0.4309791624546051, 'fcm_dpo/delta': 0.10347578674554825, 'fcm_dpo/margin': 247.46214294433594, 'margin_dpo/margin_mean': 247.46214294433594, 'margin_dpo/margin_std': 456.01971435546875, 'logps/chosen': -599.3948974609375, 'logps/rejected': -877.2378540039062, 'logps/ref_chosen': -74.06331634521484, 'logps/ref_rejected': -104.44416809082031, 'KL/chosen_KL_mean': -525.33154296875, 'KL/rejected_KL_mean': -772.793701171875, 'KL/mean': -649.0626220703125, 'KL/std': 368.4232177734375, 'logits/chosen': -0.837517261505127, 'logits/rejected': -0.8200976848602295, 'epoch': 0.76} + 76%|███████▌ | 519/681 [22:03<07:11, 2.66s/it] 76%|███████▋ | 520/681 [22:05<07:07, 2.66s/it] {'loss': 1.1189, 'grad_norm': 34.308570861816406, 'learning_rate': 8.15760890883607e-08, 'fcm_dpo/beta': 0.0012114193523302674, 'fcm_dpo/q_t': 0.4147945046424866, 'fcm_dpo/delta': 0.018862294033169746, 'fcm_dpo/margin': 315.1514892578125, 'margin_dpo/margin_mean': 315.1515197753906, 'margin_dpo/margin_std': 470.97100830078125, 'logps/chosen': -639.947509765625, 'logps/rejected': -984.780517578125, 'logps/ref_chosen': -70.2998275756836, 'logps/ref_rejected': -99.98133850097656, 'KL/chosen_KL_mean': -569.647705078125, 'KL/rejected_KL_mean': -884.7991943359375, 'KL/mean': -727.2234497070312, 'KL/std': 423.0360412597656, 'logits/chosen': -0.81200110912323, 'logits/rejected': -0.8175575733184814, 'epoch': 0.76} + 76%|███████▋ | 520/681 [22:05<07:07, 2.66s/it] 77%|███████▋ | 521/681 [22:08<07:05, 2.66s/it] {'loss': 1.0694, 'grad_norm': 40.11237335205078, 'learning_rate': 8.062991975753378e-08, 'fcm_dpo/beta': 0.0012165037915110588, 'fcm_dpo/q_t': 0.40068429708480835, 'fcm_dpo/delta': -0.03935041278600693, 'fcm_dpo/margin': 359.2563781738281, 'margin_dpo/margin_mean': 359.2563781738281, 'margin_dpo/margin_std': 451.70306396484375, 'logps/chosen': -568.2760620117188, 'logps/rejected': -952.670166015625, 'logps/ref_chosen': -58.14292526245117, 'logps/ref_rejected': -83.28060913085938, 'KL/chosen_KL_mean': -510.13311767578125, 'KL/rejected_KL_mean': -869.3895263671875, 'KL/mean': -689.7613525390625, 'KL/std': 446.6356201171875, 'logits/chosen': -0.8692072629928589, 'logits/rejected': -0.8731534481048584, 'epoch': 0.77} + 77%|███████▋ | 521/681 [22:08<07:05, 2.66s/it] 77%|███████▋ | 522/681 [22:11<07:01, 2.65s/it] {'loss': 1.1248, 'grad_norm': 32.06018829345703, 'learning_rate': 7.968821348583643e-08, 'fcm_dpo/beta': 0.001208572182804346, 'fcm_dpo/q_t': 0.4123893082141876, 'fcm_dpo/delta': 0.011602986603975296, 'fcm_dpo/margin': 321.7369079589844, 'margin_dpo/margin_mean': 321.7369079589844, 'margin_dpo/margin_std': 514.0554809570312, 'logps/chosen': -637.2626342773438, 'logps/rejected': -978.4656982421875, 'logps/ref_chosen': -46.54766845703125, 'logps/ref_rejected': -66.01388549804688, 'KL/chosen_KL_mean': -590.7149658203125, 'KL/rejected_KL_mean': -912.4517822265625, 'KL/mean': -751.5833740234375, 'KL/std': 471.05303955078125, 'logits/chosen': -0.8791370987892151, 'logits/rejected': -0.8823133707046509, 'epoch': 0.77} + 77%|███████▋ | 522/681 [22:11<07:01, 2.65s/it] 77%|███████▋ | 523/681 [22:13<07:02, 2.68s/it] {'loss': 1.1257, 'grad_norm': 39.04078674316406, 'learning_rate': 7.875099508810484e-08, 'fcm_dpo/beta': 0.0012069594813510776, 'fcm_dpo/q_t': 0.4096330404281616, 'fcm_dpo/delta': -0.0098798843100667, 'fcm_dpo/margin': 339.26068115234375, 'margin_dpo/margin_mean': 339.26068115234375, 'margin_dpo/margin_std': 569.8573608398438, 'logps/chosen': -696.2294921875, 'logps/rejected': -1057.48193359375, 'logps/ref_chosen': -61.76960372924805, 'logps/ref_rejected': -83.76141357421875, 'KL/chosen_KL_mean': -634.4599609375, 'KL/rejected_KL_mean': -973.7205810546875, 'KL/mean': -804.0902099609375, 'KL/std': 544.9263916015625, 'logits/chosen': -0.9305659532546997, 'logits/rejected': -0.9318529367446899, 'epoch': 0.77} + 77%|███████▋ | 523/681 [22:13<07:02, 2.68s/it] 77%|███████▋ | 524/681 [22:16<06:58, 2.67s/it] {'loss': 1.0926, 'grad_norm': 36.887882232666016, 'learning_rate': 7.781828926091535e-08, 'fcm_dpo/beta': 0.001192695926874876, 'fcm_dpo/q_t': 0.3989192843437195, 'fcm_dpo/delta': -0.03869359940290451, 'fcm_dpo/margin': 365.6807556152344, 'margin_dpo/margin_mean': 365.68072509765625, 'margin_dpo/margin_std': 523.1288452148438, 'logps/chosen': -709.494384765625, 'logps/rejected': -1078.405029296875, 'logps/ref_chosen': -78.0720443725586, 'logps/ref_rejected': -81.30198669433594, 'KL/chosen_KL_mean': -631.42236328125, 'KL/rejected_KL_mean': -997.10302734375, 'KL/mean': -814.2626953125, 'KL/std': 514.68408203125, 'logits/chosen': -0.9622774124145508, 'logits/rejected': -0.9509581327438354, 'epoch': 0.77} + 77%|███████▋ | 524/681 [22:16<06:58, 2.67s/it] 77%|███████▋ | 525/681 [22:19<06:57, 2.67s/it] {'loss': 1.0231, 'grad_norm': 25.891273498535156, 'learning_rate': 7.689012058193384e-08, 'fcm_dpo/beta': 0.0011688778176903725, 'fcm_dpo/q_t': 0.3837316036224365, 'fcm_dpo/delta': -0.13245530426502228, 'fcm_dpo/margin': 448.8243408203125, 'margin_dpo/margin_mean': 448.82427978515625, 'margin_dpo/margin_std': 557.4779663085938, 'logps/chosen': -672.9616088867188, 'logps/rejected': -1171.010986328125, 'logps/ref_chosen': -50.827857971191406, 'logps/ref_rejected': -100.05294036865234, 'KL/chosen_KL_mean': -622.1337890625, 'KL/rejected_KL_mean': -1070.9580078125, 'KL/mean': -846.5458984375, 'KL/std': 526.7284545898438, 'logits/chosen': -0.918233335018158, 'logits/rejected': -0.9528594017028809, 'epoch': 0.77} + 77%|███████▋ | 525/681 [22:19<06:57, 2.67s/it] 77%|███████▋ | 526/681 [22:21<06:47, 2.63s/it] {'loss': 1.0506, 'grad_norm': 23.709228515625, 'learning_rate': 7.596651350926836e-08, 'fcm_dpo/beta': 0.0011508764000609517, 'fcm_dpo/q_t': 0.38582324981689453, 'fcm_dpo/delta': -0.10911859571933746, 'fcm_dpo/margin': 437.71282958984375, 'margin_dpo/margin_mean': 437.7127990722656, 'margin_dpo/margin_std': 586.166259765625, 'logps/chosen': -726.302001953125, 'logps/rejected': -1187.156982421875, 'logps/ref_chosen': -63.167236328125, 'logps/ref_rejected': -86.30934143066406, 'KL/chosen_KL_mean': -663.134765625, 'KL/rejected_KL_mean': -1100.84765625, 'KL/mean': -881.9912109375, 'KL/std': 521.672119140625, 'logits/chosen': -0.8999603986740112, 'logits/rejected': -0.8906654119491577, 'epoch': 0.77} + 77%|███████▋ | 526/681 [22:21<06:47, 2.63s/it] 77%|███████▋ | 527/681 [22:24<06:41, 2.61s/it] {'loss': 1.1423, 'grad_norm': 43.69949722290039, 'learning_rate': 7.504749238082414e-08, 'fcm_dpo/beta': 0.001149723306298256, 'fcm_dpo/q_t': 0.42192360758781433, 'fcm_dpo/delta': 0.06489390134811401, 'fcm_dpo/margin': 293.4132385253906, 'margin_dpo/margin_mean': 293.4132385253906, 'margin_dpo/margin_std': 454.53448486328125, 'logps/chosen': -758.66552734375, 'logps/rejected': -1059.292724609375, 'logps/ref_chosen': -71.12867736816406, 'logps/ref_rejected': -78.3425521850586, 'KL/chosen_KL_mean': -687.536865234375, 'KL/rejected_KL_mean': -980.9501342773438, 'KL/mean': -834.2434692382812, 'KL/std': 535.2979736328125, 'logits/chosen': -1.1003575325012207, 'logits/rejected': -1.0651922225952148, 'epoch': 0.77} + 77%|███████▋ | 527/681 [22:24<06:41, 2.61s/it] 78%|███████▊ | 528/681 [22:27<06:48, 2.67s/it] {'loss': 1.1277, 'grad_norm': 49.07489776611328, 'learning_rate': 7.413308141366254e-08, 'fcm_dpo/beta': 0.0011513070203363895, 'fcm_dpo/q_t': 0.4090343713760376, 'fcm_dpo/delta': -0.018490692600607872, 'fcm_dpo/margin': 362.7888488769531, 'margin_dpo/margin_mean': 362.7888488769531, 'margin_dpo/margin_std': 622.3973388671875, 'logps/chosen': -757.7330322265625, 'logps/rejected': -1146.342529296875, 'logps/ref_chosen': -68.0894546508789, 'logps/ref_rejected': -93.91006469726562, 'KL/chosen_KL_mean': -689.6435546875, 'KL/rejected_KL_mean': -1052.4324951171875, 'KL/mean': -871.0379638671875, 'KL/std': 510.6888427734375, 'logits/chosen': -1.0010507106781006, 'logits/rejected': -0.9815536141395569, 'epoch': 0.78} + 78%|███████▊ | 528/681 [22:27<06:48, 2.67s/it] 78%|███████▊ | 529/681 [22:29<06:46, 2.67s/it] {'loss': 1.2213, 'grad_norm': 45.102630615234375, 'learning_rate': 7.322330470336313e-08, 'fcm_dpo/beta': 0.0011541005223989487, 'fcm_dpo/q_t': 0.43285101652145386, 'fcm_dpo/delta': 0.01186126284301281, 'fcm_dpo/margin': 253.6956787109375, 'margin_dpo/margin_mean': 253.6956787109375, 'margin_dpo/margin_std': 574.0363159179688, 'logps/chosen': -855.5684814453125, 'logps/rejected': -1142.8983154296875, 'logps/ref_chosen': -55.57495880126953, 'logps/ref_rejected': -89.20909118652344, 'KL/chosen_KL_mean': -799.9935302734375, 'KL/rejected_KL_mean': -1053.689208984375, 'KL/mean': -926.8414306640625, 'KL/std': 460.82391357421875, 'logits/chosen': -1.0135385990142822, 'logits/rejected': -1.021782398223877, 'epoch': 0.78} + 78%|███████▊ | 529/681 [22:29<06:46, 2.67s/it] 78%|███████▊ | 530/681 [22:32<06:48, 2.70s/it] {'loss': 1.1222, 'grad_norm': 44.16566467285156, 'learning_rate': 7.231818622338822e-08, 'fcm_dpo/beta': 0.0011464983690530062, 'fcm_dpo/q_t': 0.4019849896430969, 'fcm_dpo/delta': -0.0540442019701004, 'fcm_dpo/margin': 393.927490234375, 'margin_dpo/margin_mean': 393.927490234375, 'margin_dpo/margin_std': 693.3948974609375, 'logps/chosen': -723.939208984375, 'logps/rejected': -1157.5498046875, 'logps/ref_chosen': -47.601417541503906, 'logps/ref_rejected': -87.2845230102539, 'KL/chosen_KL_mean': -676.3377685546875, 'KL/rejected_KL_mean': -1070.2652587890625, 'KL/mean': -873.301513671875, 'KL/std': 554.6763916015625, 'logits/chosen': -0.9260751008987427, 'logits/rejected': -0.919657289981842, 'epoch': 0.78} + 78%|███████▊ | 530/681 [22:32<06:48, 2.70s/it] 78%|███████▊ | 531/681 [22:34<06:25, 2.57s/it] {'loss': 1.1423, 'grad_norm': 41.70063781738281, 'learning_rate': 7.141774982445147e-08, 'fcm_dpo/beta': 0.0011490847682580352, 'fcm_dpo/q_t': 0.417187362909317, 'fcm_dpo/delta': 0.0292234905064106, 'fcm_dpo/margin': 323.489990234375, 'margin_dpo/margin_mean': 323.4900207519531, 'margin_dpo/margin_std': 554.1434326171875, 'logps/chosen': -810.950927734375, 'logps/rejected': -1149.80078125, 'logps/ref_chosen': -55.246063232421875, 'logps/ref_rejected': -70.60598754882812, 'KL/chosen_KL_mean': -755.704833984375, 'KL/rejected_KL_mean': -1079.19482421875, 'KL/mean': -917.4498901367188, 'KL/std': 589.1134033203125, 'logits/chosen': -1.028259038925171, 'logits/rejected': -1.0060193538665771, 'epoch': 0.78} + 78%|███████▊ | 531/681 [22:34<06:25, 2.57s/it] 78%|███████▊ | 532/681 [22:37<06:29, 2.61s/it] {'loss': 1.1483, 'grad_norm': 68.5473861694336, 'learning_rate': 7.052201923388953e-08, 'fcm_dpo/beta': 0.0011367748957127333, 'fcm_dpo/q_t': 0.40855199098587036, 'fcm_dpo/delta': -0.01567455381155014, 'fcm_dpo/margin': 363.9248046875, 'margin_dpo/margin_mean': 363.9248046875, 'margin_dpo/margin_std': 657.9794311523438, 'logps/chosen': -817.2122802734375, 'logps/rejected': -1197.4423828125, 'logps/ref_chosen': -70.28601837158203, 'logps/ref_rejected': -86.5913314819336, 'KL/chosen_KL_mean': -746.92626953125, 'KL/rejected_KL_mean': -1110.85107421875, 'KL/mean': -928.888671875, 'KL/std': 552.4840087890625, 'logits/chosen': -0.9877306818962097, 'logits/rejected': -0.9620273113250732, 'epoch': 0.78} + 78%|███████▊ | 532/681 [22:37<06:29, 2.61s/it] 78%|███████▊ | 533/681 [22:40<06:24, 2.60s/it] {'loss': 1.2151, 'grad_norm': 41.75889205932617, 'learning_rate': 6.963101805503646e-08, 'fcm_dpo/beta': 0.0011436111526563764, 'fcm_dpo/q_t': 0.43308863043785095, 'fcm_dpo/delta': -0.010318025015294552, 'fcm_dpo/margin': 260.69586181640625, 'margin_dpo/margin_mean': 260.69586181640625, 'margin_dpo/margin_std': 586.0986328125, 'logps/chosen': -743.72509765625, 'logps/rejected': -1016.1539306640625, 'logps/ref_chosen': -64.8551025390625, 'logps/ref_rejected': -76.58805847167969, 'KL/chosen_KL_mean': -678.8699951171875, 'KL/rejected_KL_mean': -939.5658569335938, 'KL/mean': -809.2179565429688, 'KL/std': 484.06103515625, 'logits/chosen': -1.0021346807479858, 'logits/rejected': -0.9763340950012207, 'epoch': 0.78} + 78%|███████▊ | 533/681 [22:40<06:24, 2.60s/it] 78%|███████▊ | 534/681 [22:42<06:21, 2.60s/it] {'loss': 1.1125, 'grad_norm': 35.797950744628906, 'learning_rate': 6.874476976660184e-08, 'fcm_dpo/beta': 0.0011327785905450583, 'fcm_dpo/q_t': 0.40905874967575073, 'fcm_dpo/delta': -0.005801960825920105, 'fcm_dpo/margin': 356.9794616699219, 'margin_dpo/margin_mean': 356.9794921875, 'margin_dpo/margin_std': 537.181396484375, 'logps/chosen': -747.182861328125, 'logps/rejected': -1122.58642578125, 'logps/ref_chosen': -60.119388580322266, 'logps/ref_rejected': -78.54347229003906, 'KL/chosen_KL_mean': -687.0634765625, 'KL/rejected_KL_mean': -1044.04296875, 'KL/mean': -865.5532836914062, 'KL/std': 514.9797973632812, 'logits/chosen': -0.9988099336624146, 'logits/rejected': -0.9967177510261536, 'epoch': 0.78} + 78%|███████▊ | 534/681 [22:42<06:21, 2.60s/it] 79%|███████▊ | 535/681 [22:45<06:19, 2.60s/it] {'loss': 1.0557, 'grad_norm': 30.948278427124023, 'learning_rate': 6.786329772205246e-08, 'fcm_dpo/beta': 0.001136034494265914, 'fcm_dpo/q_t': 0.39456337690353394, 'fcm_dpo/delta': -0.07386443018913269, 'fcm_dpo/margin': 413.7446594238281, 'margin_dpo/margin_mean': 413.7446594238281, 'margin_dpo/margin_std': 526.1167602539062, 'logps/chosen': -631.0035400390625, 'logps/rejected': -1086.7255859375, 'logps/ref_chosen': -54.330238342285156, 'logps/ref_rejected': -96.30763244628906, 'KL/chosen_KL_mean': -576.67333984375, 'KL/rejected_KL_mean': -990.41796875, 'KL/mean': -783.545654296875, 'KL/std': 499.79852294921875, 'logits/chosen': -0.9196850061416626, 'logits/rejected': -0.921947717666626, 'epoch': 0.79} + 79%|███████▊ | 535/681 [22:45<06:19, 2.60s/it] 79%|███████▊ | 536/681 [22:47<06:19, 2.62s/it] {'loss': 1.0215, 'grad_norm': 29.700851440429688, 'learning_rate': 6.698662514899638e-08, 'fcm_dpo/beta': 0.0011004150146618485, 'fcm_dpo/q_t': 0.383211225271225, 'fcm_dpo/delta': -0.14926910400390625, 'fcm_dpo/margin': 491.8769226074219, 'margin_dpo/margin_mean': 491.8769226074219, 'margin_dpo/margin_std': 654.18408203125, 'logps/chosen': -567.2508544921875, 'logps/rejected': -1101.14501953125, 'logps/ref_chosen': -47.08053207397461, 'logps/ref_rejected': -89.09783935546875, 'KL/chosen_KL_mean': -520.1702880859375, 'KL/rejected_KL_mean': -1012.0472412109375, 'KL/mean': -766.1087646484375, 'KL/std': 554.3941650390625, 'logits/chosen': -0.8961449265480042, 'logits/rejected': -0.9250037670135498, 'epoch': 0.79} + 79%|███████▊ | 536/681 [22:47<06:19, 2.62s/it] 79%|███████▉ | 537/681 [22:50<06:13, 2.60s/it] {'loss': 1.1431, 'grad_norm': 44.981773376464844, 'learning_rate': 6.611477514857114e-08, 'fcm_dpo/beta': 0.0011028747539967299, 'fcm_dpo/q_t': 0.41649651527404785, 'fcm_dpo/delta': 0.037213459610939026, 'fcm_dpo/margin': 329.5657958984375, 'margin_dpo/margin_mean': 329.5657958984375, 'margin_dpo/margin_std': 545.5430297851562, 'logps/chosen': -594.7760009765625, 'logps/rejected': -937.0326538085938, 'logps/ref_chosen': -57.747467041015625, 'logps/ref_rejected': -70.43838500976562, 'KL/chosen_KL_mean': -537.0285034179688, 'KL/rejected_KL_mean': -866.59423828125, 'KL/mean': -701.8114013671875, 'KL/std': 445.88031005859375, 'logits/chosen': -0.9382889270782471, 'logits/rejected': -0.9202646017074585, 'epoch': 0.79} + 79%|███████▉ | 537/681 [22:50<06:13, 2.60s/it] 79%|███████▉ | 538/681 [22:53<06:13, 2.61s/it] {'loss': 1.0874, 'grad_norm': 28.537601470947266, 'learning_rate': 6.524777069483525e-08, 'fcm_dpo/beta': 0.00109610625077039, 'fcm_dpo/q_t': 0.40666401386260986, 'fcm_dpo/delta': -0.008285703137516975, 'fcm_dpo/margin': 372.10687255859375, 'margin_dpo/margin_mean': 372.1068420410156, 'margin_dpo/margin_std': 494.2760314941406, 'logps/chosen': -722.337646484375, 'logps/rejected': -1112.256591796875, 'logps/ref_chosen': -66.41594696044922, 'logps/ref_rejected': -84.22808837890625, 'KL/chosen_KL_mean': -655.9216918945312, 'KL/rejected_KL_mean': -1028.028564453125, 'KL/mean': -841.97509765625, 'KL/std': 484.8907165527344, 'logits/chosen': -0.9427316188812256, 'logits/rejected': -0.9291995763778687, 'epoch': 0.79} + 79%|███████▉ | 538/681 [22:53<06:13, 2.61s/it] 79%|███████▉ | 539/681 [22:55<06:14, 2.64s/it] {'loss': 1.0878, 'grad_norm': 34.4008903503418, 'learning_rate': 6.438563463416221e-08, 'fcm_dpo/beta': 0.0011011988390237093, 'fcm_dpo/q_t': 0.40968143939971924, 'fcm_dpo/delta': 0.009760351851582527, 'fcm_dpo/margin': 354.651123046875, 'margin_dpo/margin_mean': 354.651123046875, 'margin_dpo/margin_std': 434.3199462890625, 'logps/chosen': -610.7379150390625, 'logps/rejected': -998.7501220703125, 'logps/ref_chosen': -58.492855072021484, 'logps/ref_rejected': -91.85395050048828, 'KL/chosen_KL_mean': -552.2450561523438, 'KL/rejected_KL_mean': -906.8961791992188, 'KL/mean': -729.5706176757812, 'KL/std': 395.36083984375, 'logits/chosen': -0.9456039667129517, 'logits/rejected': -0.9376469254493713, 'epoch': 0.79} + 79%|███████▉ | 539/681 [22:55<06:14, 2.64s/it] 79%|███████▉ | 540/681 [22:58<06:15, 2.66s/it] {'loss': 1.0481, 'grad_norm': 39.40283966064453, 'learning_rate': 6.352838968463919e-08, 'fcm_dpo/beta': 0.0010876839514821768, 'fcm_dpo/q_t': 0.3912101984024048, 'fcm_dpo/delta': -0.09584314376115799, 'fcm_dpo/margin': 451.6276550292969, 'margin_dpo/margin_mean': 451.6276550292969, 'margin_dpo/margin_std': 583.0654907226562, 'logps/chosen': -607.045654296875, 'logps/rejected': -1111.620849609375, 'logps/ref_chosen': -63.482513427734375, 'logps/ref_rejected': -116.42999267578125, 'KL/chosen_KL_mean': -543.5631103515625, 'KL/rejected_KL_mean': -995.1907958984375, 'KL/mean': -769.376953125, 'KL/std': 495.0863342285156, 'logits/chosen': -0.8707677721977234, 'logits/rejected': -0.8923947811126709, 'epoch': 0.79} + 79%|███████▉ | 540/681 [22:58<06:15, 2.66s/it] 79%|███████▉ | 541/681 [23:00<06:01, 2.58s/it] {'loss': 1.2177, 'grad_norm': 53.83041763305664, 'learning_rate': 6.267605843546767e-08, 'fcm_dpo/beta': 0.001078011584468186, 'fcm_dpo/q_t': 0.43801432847976685, 'fcm_dpo/delta': -0.004111842717975378, 'fcm_dpo/margin': 249.92453002929688, 'margin_dpo/margin_mean': 249.92453002929688, 'margin_dpo/margin_std': 536.0916748046875, 'logps/chosen': -736.8777465820312, 'logps/rejected': -1011.7955932617188, 'logps/ref_chosen': -78.28036499023438, 'logps/ref_rejected': -103.273681640625, 'KL/chosen_KL_mean': -658.597412109375, 'KL/rejected_KL_mean': -908.5219116210938, 'KL/mean': -783.5596923828125, 'KL/std': 451.7571716308594, 'logits/chosen': -0.9981366395950317, 'logits/rejected': -0.9937785863876343, 'epoch': 0.79} + 79%|███████▉ | 541/681 [23:00<06:01, 2.58s/it] 80%|███████▉ | 542/681 [23:03<06:09, 2.66s/it] {'loss': 1.0592, 'grad_norm': 52.30241012573242, 'learning_rate': 6.182866334636888e-08, 'fcm_dpo/beta': 0.0010561456438153982, 'fcm_dpo/q_t': 0.3928494155406952, 'fcm_dpo/delta': -0.08209630846977234, 'fcm_dpo/margin': 450.8837585449219, 'margin_dpo/margin_mean': 450.8837585449219, 'margin_dpo/margin_std': 593.1970825195312, 'logps/chosen': -620.0906982421875, 'logps/rejected': -1109.9644775390625, 'logps/ref_chosen': -57.48497009277344, 'logps/ref_rejected': -96.47506713867188, 'KL/chosen_KL_mean': -562.605712890625, 'KL/rejected_KL_mean': -1013.4893798828125, 'KL/mean': -788.047607421875, 'KL/std': 504.0601806640625, 'logits/chosen': -0.9771475791931152, 'logits/rejected': -1.0094921588897705, 'epoch': 0.8} + 80%|███████▉ | 542/681 [23:03<06:09, 2.66s/it] 80%|███████▉ | 543/681 [23:06<06:03, 2.64s/it] {'loss': 1.1962, 'grad_norm': 30.09369659423828, 'learning_rate': 6.098622674699147e-08, 'fcm_dpo/beta': 0.0010682092979550362, 'fcm_dpo/q_t': 0.4326469302177429, 'fcm_dpo/delta': 0.06477095186710358, 'fcm_dpo/margin': 315.904296875, 'margin_dpo/margin_mean': 315.9043273925781, 'margin_dpo/margin_std': 699.5291748046875, 'logps/chosen': -689.615478515625, 'logps/rejected': -1050.501220703125, 'logps/ref_chosen': -60.61750793457031, 'logps/ref_rejected': -105.59896850585938, 'KL/chosen_KL_mean': -628.9979858398438, 'KL/rejected_KL_mean': -944.90234375, 'KL/mean': -786.9501953125, 'KL/std': 587.5977783203125, 'logits/chosen': -0.9427838325500488, 'logits/rejected': -0.9720630645751953, 'epoch': 0.8} + 80%|███████▉ | 543/681 [23:06<06:03, 2.64s/it] 80%|███████▉ | 544/681 [23:08<05:56, 2.61s/it] {'loss': 1.094, 'grad_norm': 32.88768005371094, 'learning_rate': 6.01487708363232e-08, 'fcm_dpo/beta': 0.0010710186325013638, 'fcm_dpo/q_t': 0.40565305948257446, 'fcm_dpo/delta': -0.014568203128874302, 'fcm_dpo/margin': 386.49005126953125, 'margin_dpo/margin_mean': 386.49005126953125, 'margin_dpo/margin_std': 549.4237060546875, 'logps/chosen': -699.2671508789062, 'logps/rejected': -1127.069580078125, 'logps/ref_chosen': -59.642303466796875, 'logps/ref_rejected': -100.95469665527344, 'KL/chosen_KL_mean': -639.6248779296875, 'KL/rejected_KL_mean': -1026.1148681640625, 'KL/mean': -832.869873046875, 'KL/std': 483.07550048828125, 'logits/chosen': -0.9224880933761597, 'logits/rejected': -0.9422965049743652, 'epoch': 0.8} + 80%|███████▉ | 544/681 [23:08<05:56, 2.61s/it] 80%|████████ | 545/681 [23:11<05:50, 2.58s/it] {'loss': 1.0488, 'grad_norm': 32.99470520019531, 'learning_rate': 5.9316317682106294e-08, 'fcm_dpo/beta': 0.0010588113218545914, 'fcm_dpo/q_t': 0.39326316118240356, 'fcm_dpo/delta': -0.07936666160821915, 'fcm_dpo/margin': 449.18927001953125, 'margin_dpo/margin_mean': 449.18927001953125, 'margin_dpo/margin_std': 571.0772705078125, 'logps/chosen': -660.5941162109375, 'logps/rejected': -1138.042724609375, 'logps/ref_chosen': -67.64859771728516, 'logps/ref_rejected': -95.90800476074219, 'KL/chosen_KL_mean': -592.945556640625, 'KL/rejected_KL_mean': -1042.134765625, 'KL/mean': -817.5401611328125, 'KL/std': 495.79449462890625, 'logits/chosen': -0.8537076711654663, 'logits/rejected': -0.8849306106567383, 'epoch': 0.8} + 80%|████████ | 545/681 [23:11<05:50, 2.58s/it] 80%|████████ | 546/681 [23:13<05:47, 2.57s/it] {'loss': 1.1519, 'grad_norm': 32.57497024536133, 'learning_rate': 5.848888922025552e-08, 'fcm_dpo/beta': 0.0010674262885004282, 'fcm_dpo/q_t': 0.42482131719589233, 'fcm_dpo/delta': 0.08395257592201233, 'fcm_dpo/margin': 298.6014709472656, 'margin_dpo/margin_mean': 298.6014709472656, 'margin_dpo/margin_std': 461.811767578125, 'logps/chosen': -617.6420288085938, 'logps/rejected': -947.365478515625, 'logps/ref_chosen': -50.744232177734375, 'logps/ref_rejected': -81.86622619628906, 'KL/chosen_KL_mean': -566.8978271484375, 'KL/rejected_KL_mean': -865.499267578125, 'KL/mean': -716.198486328125, 'KL/std': 434.204833984375, 'logits/chosen': -0.9205929040908813, 'logits/rejected': -0.910815954208374, 'epoch': 0.8} + 80%|████████ | 546/681 [23:14<05:47, 2.57s/it] 80%|████████ | 547/681 [23:16<05:51, 2.62s/it] {'loss': 1.0961, 'grad_norm': 50.77888870239258, 'learning_rate': 5.7666507254280265e-08, 'fcm_dpo/beta': 0.0010726114269345999, 'fcm_dpo/q_t': 0.40798118710517883, 'fcm_dpo/delta': -0.006979792378842831, 'fcm_dpo/margin': 379.1336669921875, 'margin_dpo/margin_mean': 379.1336669921875, 'margin_dpo/margin_std': 533.245361328125, 'logps/chosen': -646.9344482421875, 'logps/rejected': -1043.1417236328125, 'logps/ref_chosen': -73.6877212524414, 'logps/ref_rejected': -90.76136779785156, 'KL/chosen_KL_mean': -573.2467041015625, 'KL/rejected_KL_mean': -952.38037109375, 'KL/mean': -762.8134765625, 'KL/std': 485.2721862792969, 'logits/chosen': -0.8564267158508301, 'logits/rejected': -0.8684166669845581, 'epoch': 0.8} + 80%|████████ | 547/681 [23:16<05:51, 2.62s/it] 80%|████████ | 548/681 [23:19<05:43, 2.58s/it] {'loss': 1.1184, 'grad_norm': 31.580347061157227, 'learning_rate': 5.684919345471029e-08, 'fcm_dpo/beta': 0.0010729740606620908, 'fcm_dpo/q_t': 0.4163675606250763, 'fcm_dpo/delta': 0.019438141956925392, 'fcm_dpo/margin': 355.3732604980469, 'margin_dpo/margin_mean': 355.373291015625, 'margin_dpo/margin_std': 558.9320068359375, 'logps/chosen': -665.49169921875, 'logps/rejected': -1049.7366943359375, 'logps/ref_chosen': -65.24634552001953, 'logps/ref_rejected': -94.11807250976562, 'KL/chosen_KL_mean': -600.245361328125, 'KL/rejected_KL_mean': -955.61865234375, 'KL/mean': -777.9320068359375, 'KL/std': 507.2066650390625, 'logits/chosen': -0.9392424821853638, 'logits/rejected': -0.9410542845726013, 'epoch': 0.8} + 80%|████████ | 548/681 [23:19<05:43, 2.58s/it] 81%|████████ | 549/681 [23:21<05:37, 2.55s/it] {'loss': 1.1787, 'grad_norm': 48.66642379760742, 'learning_rate': 5.603696935852426e-08, 'fcm_dpo/beta': 0.0010954017052426934, 'fcm_dpo/q_t': 0.43178755044937134, 'fcm_dpo/delta': 0.10109251737594604, 'fcm_dpo/margin': 275.03582763671875, 'margin_dpo/margin_mean': 275.03582763671875, 'margin_dpo/margin_std': 496.511474609375, 'logps/chosen': -667.7421875, 'logps/rejected': -967.4760131835938, 'logps/ref_chosen': -49.21235656738281, 'logps/ref_rejected': -73.91031646728516, 'KL/chosen_KL_mean': -618.5299072265625, 'KL/rejected_KL_mean': -893.565673828125, 'KL/mean': -756.0477294921875, 'KL/std': 416.1364440917969, 'logits/chosen': -0.9122521877288818, 'logits/rejected': -0.9025084376335144, 'epoch': 0.81} + 81%|████████ | 549/681 [23:21<05:37, 2.55s/it] 81%|████████ | 550/681 [23:24<05:36, 2.57s/it] {'loss': 1.132, 'grad_norm': 37.804141998291016, 'learning_rate': 5.5229856368582376e-08, 'fcm_dpo/beta': 0.0011030520545318723, 'fcm_dpo/q_t': 0.4183220863342285, 'fcm_dpo/delta': 0.042437393218278885, 'fcm_dpo/margin': 325.55517578125, 'margin_dpo/margin_mean': 325.55517578125, 'margin_dpo/margin_std': 513.7775268554688, 'logps/chosen': -694.664794921875, 'logps/rejected': -1058.538818359375, 'logps/ref_chosen': -56.80695343017578, 'logps/ref_rejected': -95.12580871582031, 'KL/chosen_KL_mean': -637.8577880859375, 'KL/rejected_KL_mean': -963.4129638671875, 'KL/mean': -800.6353759765625, 'KL/std': 484.2886962890625, 'logits/chosen': -0.8820310831069946, 'logits/rejected': -0.9049103260040283, 'epoch': 0.81} + 81%|████████ | 550/681 [23:24<05:36, 2.57s/it] 81%|████████ | 551/681 [23:26<05:29, 2.54s/it] {'loss': 0.9641, 'grad_norm': 56.2789306640625, 'learning_rate': 5.4427875753062734e-08, 'fcm_dpo/beta': 0.0010755530092865229, 'fcm_dpo/q_t': 0.36954620480537415, 'fcm_dpo/delta': -0.17676769196987152, 'fcm_dpo/margin': 526.6087036132812, 'margin_dpo/margin_mean': 526.6087036132812, 'margin_dpo/margin_std': 504.5882568359375, 'logps/chosen': -606.9834594726562, 'logps/rejected': -1186.15869140625, 'logps/ref_chosen': -59.10633087158203, 'logps/ref_rejected': -111.67280578613281, 'KL/chosen_KL_mean': -547.8771362304688, 'KL/rejected_KL_mean': -1074.48583984375, 'KL/mean': -811.1815185546875, 'KL/std': 508.75482177734375, 'logits/chosen': -0.8909007906913757, 'logits/rejected': -0.9451035857200623, 'epoch': 0.81} + 81%|████████ | 551/681 [23:26<05:29, 2.54s/it] 81%|████████ | 552/681 [23:29<05:16, 2.46s/it] {'loss': 0.9746, 'grad_norm': 48.699928283691406, 'learning_rate': 5.363104864490034e-08, 'fcm_dpo/beta': 0.0010228096507489681, 'fcm_dpo/q_t': 0.3673725724220276, 'fcm_dpo/delta': -0.229129359126091, 'fcm_dpo/margin': 598.0020751953125, 'margin_dpo/margin_mean': 598.0020751953125, 'margin_dpo/margin_std': 696.8597412109375, 'logps/chosen': -597.419677734375, 'logps/rejected': -1237.62939453125, 'logps/ref_chosen': -62.35459899902344, 'logps/ref_rejected': -104.56210327148438, 'KL/chosen_KL_mean': -535.0650634765625, 'KL/rejected_KL_mean': -1133.067138671875, 'KL/mean': -834.066162109375, 'KL/std': 607.322265625, 'logits/chosen': -0.9140257835388184, 'logits/rejected': -0.9522314071655273, 'epoch': 0.81} + 81%|████████ | 552/681 [23:29<05:16, 2.46s/it] 81%|████████ | 553/681 [23:31<05:23, 2.53s/it] {'loss': 1.1505, 'grad_norm': 26.667526245117188, 'learning_rate': 5.2839396041230415e-08, 'fcm_dpo/beta': 0.0010280333226546645, 'fcm_dpo/q_t': 0.42561495304107666, 'fcm_dpo/delta': 0.06775818020105362, 'fcm_dpo/margin': 325.43560791015625, 'margin_dpo/margin_mean': 325.43560791015625, 'margin_dpo/margin_std': 544.8580322265625, 'logps/chosen': -695.8313598632812, 'logps/rejected': -1051.105224609375, 'logps/ref_chosen': -68.25881958007812, 'logps/ref_rejected': -98.0971450805664, 'KL/chosen_KL_mean': -627.572509765625, 'KL/rejected_KL_mean': -953.0081176757812, 'KL/mean': -790.2903442382812, 'KL/std': 503.21502685546875, 'logits/chosen': -0.8898186683654785, 'logits/rejected': -0.8853092789649963, 'epoch': 0.81} + 81%|████████ | 553/681 [23:31<05:23, 2.53s/it] 81%|████████▏ | 554/681 [23:34<05:29, 2.60s/it] {'loss': 1.1182, 'grad_norm': 76.91921997070312, 'learning_rate': 5.205293880283551e-08, 'fcm_dpo/beta': 0.0010344828478991985, 'fcm_dpo/q_t': 0.40528228878974915, 'fcm_dpo/delta': -0.034839678555727005, 'fcm_dpo/margin': 418.43157958984375, 'margin_dpo/margin_mean': 418.43157958984375, 'margin_dpo/margin_std': 690.6478881835938, 'logps/chosen': -716.26953125, 'logps/rejected': -1156.5361328125, 'logps/ref_chosen': -67.94767761230469, 'logps/ref_rejected': -89.78272247314453, 'KL/chosen_KL_mean': -648.32177734375, 'KL/rejected_KL_mean': -1066.75341796875, 'KL/mean': -857.53759765625, 'KL/std': 539.8403930664062, 'logits/chosen': -0.8666242957115173, 'logits/rejected': -0.8389246463775635, 'epoch': 0.81} + 81%|████████▏ | 554/681 [23:34<05:29, 2.60s/it] 81%|████████▏ | 555/681 [23:36<05:20, 2.54s/it] {'loss': 1.0655, 'grad_norm': 40.76702880859375, 'learning_rate': 5.127169765359515e-08, 'fcm_dpo/beta': 0.0010111583396792412, 'fcm_dpo/q_t': 0.3918069899082184, 'fcm_dpo/delta': -0.10909023135900497, 'fcm_dpo/margin': 498.12396240234375, 'margin_dpo/margin_mean': 498.12396240234375, 'margin_dpo/margin_std': 737.4571533203125, 'logps/chosen': -703.8853149414062, 'logps/rejected': -1257.158203125, 'logps/ref_chosen': -53.33049011230469, 'logps/ref_rejected': -108.47937774658203, 'KL/chosen_KL_mean': -650.5548095703125, 'KL/rejected_KL_mean': -1148.6787109375, 'KL/mean': -899.6168212890625, 'KL/std': 573.9271850585938, 'logits/chosen': -0.9580224752426147, 'logits/rejected': -1.0123507976531982, 'epoch': 0.81} + 81%|████████▏ | 555/681 [23:36<05:20, 2.54s/it] 82%|████████▏ | 556/681 [23:39<05:23, 2.59s/it] {'loss': 1.1524, 'grad_norm': 35.782039642333984, 'learning_rate': 5.049569317994012e-08, 'fcm_dpo/beta': 0.0010182505939155817, 'fcm_dpo/q_t': 0.4286388158798218, 'fcm_dpo/delta': 0.09340062737464905, 'fcm_dpo/margin': 303.96832275390625, 'margin_dpo/margin_mean': 303.96832275390625, 'margin_dpo/margin_std': 460.0691223144531, 'logps/chosen': -704.904296875, 'logps/rejected': -1051.568603515625, 'logps/ref_chosen': -58.64447021484375, 'logps/ref_rejected': -101.34040832519531, 'KL/chosen_KL_mean': -646.2598266601562, 'KL/rejected_KL_mean': -950.2281494140625, 'KL/mean': -798.2440185546875, 'KL/std': 454.10919189453125, 'logits/chosen': -0.9508916735649109, 'logits/rejected': -0.9452144503593445, 'epoch': 0.82} + 82%|████████▏ | 556/681 [23:39<05:23, 2.59s/it] 82%|████████▏ | 557/681 [23:42<05:22, 2.60s/it] {'loss': 1.1038, 'grad_norm': 52.255699157714844, 'learning_rate': 4.9724945830310144e-08, 'fcm_dpo/beta': 0.0010126400738954544, 'fcm_dpo/q_t': 0.4009940028190613, 'fcm_dpo/delta': -0.05826106667518616, 'fcm_dpo/margin': 449.748046875, 'margin_dpo/margin_mean': 449.748046875, 'margin_dpo/margin_std': 723.2161865234375, 'logps/chosen': -785.1865234375, 'logps/rejected': -1277.033447265625, 'logps/ref_chosen': -67.84066009521484, 'logps/ref_rejected': -109.93965911865234, 'KL/chosen_KL_mean': -717.3458251953125, 'KL/rejected_KL_mean': -1167.0938720703125, 'KL/mean': -942.2198486328125, 'KL/std': 636.468505859375, 'logits/chosen': -1.0088746547698975, 'logits/rejected': -1.0433576107025146, 'epoch': 0.82} + 82%|████████▏ | 557/681 [23:42<05:22, 2.60s/it] 82%|████████▏ | 558/681 [23:44<05:17, 2.58s/it] {'loss': 0.9661, 'grad_norm': 30.629545211791992, 'learning_rate': 4.8959475914614554e-08, 'fcm_dpo/beta': 0.0009801845299080014, 'fcm_dpo/q_t': 0.36214083433151245, 'fcm_dpo/delta': -0.21119916439056396, 'fcm_dpo/margin': 610.62109375, 'margin_dpo/margin_mean': 610.62109375, 'margin_dpo/margin_std': 642.599365234375, 'logps/chosen': -706.3460083007812, 'logps/rejected': -1356.759765625, 'logps/ref_chosen': -62.36824035644531, 'logps/ref_rejected': -102.16102600097656, 'KL/chosen_KL_mean': -643.977783203125, 'KL/rejected_KL_mean': -1254.5987548828125, 'KL/mean': -949.288330078125, 'KL/std': 584.4927978515625, 'logits/chosen': -1.027779221534729, 'logits/rejected': -1.046311855316162, 'epoch': 0.82} + 82%|████████▏ | 558/681 [23:44<05:17, 2.58s/it] 82%|████████▏ | 559/681 [23:47<05:13, 2.57s/it] {'loss': 1.0467, 'grad_norm': 32.09720993041992, 'learning_rate': 4.8199303603697614e-08, 'fcm_dpo/beta': 0.0009573526913300157, 'fcm_dpo/q_t': 0.3901920020580292, 'fcm_dpo/delta': -0.09849410504102707, 'fcm_dpo/margin': 515.7313842773438, 'margin_dpo/margin_mean': 515.7313232421875, 'margin_dpo/margin_std': 678.709228515625, 'logps/chosen': -804.49267578125, 'logps/rejected': -1352.9139404296875, 'logps/ref_chosen': -60.752323150634766, 'logps/ref_rejected': -93.44229125976562, 'KL/chosen_KL_mean': -743.7402954101562, 'KL/rejected_KL_mean': -1259.4716796875, 'KL/mean': -1001.60595703125, 'KL/std': 617.6702270507812, 'logits/chosen': -1.132476568222046, 'logits/rejected': -1.138415813446045, 'epoch': 0.82} + 82%|████████▏ | 559/681 [23:47<05:13, 2.57s/it] 82%|████████▏ | 560/681 [23:50<05:17, 2.63s/it] {'loss': 1.1513, 'grad_norm': 37.20246505737305, 'learning_rate': 4.7444448928806615e-08, 'fcm_dpo/beta': 0.0009546001674607396, 'fcm_dpo/q_t': 0.4228675365447998, 'fcm_dpo/delta': 0.05963495746254921, 'fcm_dpo/margin': 358.48895263671875, 'margin_dpo/margin_mean': 358.48895263671875, 'margin_dpo/margin_std': 598.878173828125, 'logps/chosen': -737.8831787109375, 'logps/rejected': -1118.259521484375, 'logps/ref_chosen': -58.10382080078125, 'logps/ref_rejected': -79.99122619628906, 'KL/chosen_KL_mean': -679.7794189453125, 'KL/rejected_KL_mean': -1038.268310546875, 'KL/mean': -859.0238647460938, 'KL/std': 535.8975830078125, 'logits/chosen': -0.8968836069107056, 'logits/rejected': -0.8791143894195557, 'epoch': 0.82} + 82%|████████▏ | 560/681 [23:50<05:17, 2.63s/it] 82%|████████▏ | 561/681 [23:52<05:08, 2.57s/it] {'loss': 1.1995, 'grad_norm': 47.342132568359375, 'learning_rate': 4.669493178106432e-08, 'fcm_dpo/beta': 0.0009781813714653254, 'fcm_dpo/q_t': 0.4291490614414215, 'fcm_dpo/delta': 0.09319829940795898, 'fcm_dpo/margin': 316.22100830078125, 'margin_dpo/margin_mean': 316.22100830078125, 'margin_dpo/margin_std': 669.9414672851562, 'logps/chosen': -831.0726318359375, 'logps/rejected': -1195.44921875, 'logps/ref_chosen': -50.912879943847656, 'logps/ref_rejected': -99.06856536865234, 'KL/chosen_KL_mean': -780.15966796875, 'KL/rejected_KL_mean': -1096.380615234375, 'KL/mean': -938.270263671875, 'KL/std': 530.6477661132812, 'logits/chosen': -1.0569636821746826, 'logits/rejected': -1.0779967308044434, 'epoch': 0.82} + 82%|████████▏ | 561/681 [23:52<05:08, 2.57s/it] 83%|████████▎ | 562/681 [23:55<05:09, 2.60s/it] {'loss': 1.1048, 'grad_norm': 35.87330627441406, 'learning_rate': 4.5950771910944596e-08, 'fcm_dpo/beta': 0.0009731657337397337, 'fcm_dpo/q_t': 0.4083036184310913, 'fcm_dpo/delta': -0.013568423688411713, 'fcm_dpo/margin': 423.7640380859375, 'margin_dpo/margin_mean': 423.7640380859375, 'margin_dpo/margin_std': 645.1516723632812, 'logps/chosen': -804.82275390625, 'logps/rejected': -1265.6650390625, 'logps/ref_chosen': -59.46440124511719, 'logps/ref_rejected': -96.54266357421875, 'KL/chosen_KL_mean': -745.3583984375, 'KL/rejected_KL_mean': -1169.122314453125, 'KL/mean': -957.2404174804688, 'KL/std': 593.1848754882812, 'logits/chosen': -0.9769254326820374, 'logits/rejected': -0.9813790321350098, 'epoch': 0.83} + 83%|████████▎ | 562/681 [23:55<05:09, 2.60s/it] 83%|████████▎ | 563/681 [23:57<04:58, 2.53s/it] {'loss': 1.2296, 'grad_norm': 42.06772232055664, 'learning_rate': 4.521198892775202e-08, 'fcm_dpo/beta': 0.000972322653979063, 'fcm_dpo/q_t': 0.42368167638778687, 'fcm_dpo/delta': -0.05465248227119446, 'fcm_dpo/margin': 322.13623046875, 'margin_dpo/margin_mean': 322.13623046875, 'margin_dpo/margin_std': 744.3589477539062, 'logps/chosen': -890.311279296875, 'logps/rejected': -1246.406982421875, 'logps/ref_chosen': -60.60819625854492, 'logps/ref_rejected': -94.56770324707031, 'KL/chosen_KL_mean': -829.703125, 'KL/rejected_KL_mean': -1151.83935546875, 'KL/mean': -990.771240234375, 'KL/std': 633.045166015625, 'logits/chosen': -1.0264474153518677, 'logits/rejected': -1.0321646928787231, 'epoch': 0.83} + 83%|████████▎ | 563/681 [23:57<04:58, 2.53s/it] 83%|████████▎ | 564/681 [24:00<04:53, 2.51s/it] {'loss': 1.099, 'grad_norm': 35.352901458740234, 'learning_rate': 4.447860229910544e-08, 'fcm_dpo/beta': 0.0009697899222373962, 'fcm_dpo/q_t': 0.4103137254714966, 'fcm_dpo/delta': 0.0011525209993124008, 'fcm_dpo/margin': 411.1693115234375, 'margin_dpo/margin_mean': 411.1693115234375, 'margin_dpo/margin_std': 568.3698120117188, 'logps/chosen': -821.2705078125, 'logps/rejected': -1251.40966796875, 'logps/ref_chosen': -74.26837921142578, 'logps/ref_rejected': -93.23818969726562, 'KL/chosen_KL_mean': -747.0021362304688, 'KL/rejected_KL_mean': -1158.1715087890625, 'KL/mean': -952.5867919921875, 'KL/std': 572.9649658203125, 'logits/chosen': -1.1010963916778564, 'logits/rejected': -1.0915511846542358, 'epoch': 0.83} + 83%|████████▎ | 564/681 [24:00<04:53, 2.51s/it] 83%|████████▎ | 565/681 [24:02<04:55, 2.54s/it] {'loss': 1.1322, 'grad_norm': 44.35929870605469, 'learning_rate': 4.375063135042445e-08, 'fcm_dpo/beta': 0.0009645746322348714, 'fcm_dpo/q_t': 0.4098883271217346, 'fcm_dpo/delta': -0.01686248928308487, 'fcm_dpo/margin': 431.43963623046875, 'margin_dpo/margin_mean': 431.43963623046875, 'margin_dpo/margin_std': 756.8804931640625, 'logps/chosen': -845.9700927734375, 'logps/rejected': -1294.1688232421875, 'logps/ref_chosen': -69.0199203491211, 'logps/ref_rejected': -85.7789306640625, 'KL/chosen_KL_mean': -776.9501953125, 'KL/rejected_KL_mean': -1208.389892578125, 'KL/mean': -992.6700439453125, 'KL/std': 637.2417602539062, 'logits/chosen': -1.0143120288848877, 'logits/rejected': -1.0142502784729004, 'epoch': 0.83} + 83%|████████▎ | 565/681 [24:02<04:55, 2.54s/it] 83%|████████▎ | 566/681 [24:05<04:58, 2.59s/it] {'loss': 1.1008, 'grad_norm': 35.317893981933594, 'learning_rate': 4.3028095264420525e-08, 'fcm_dpo/beta': 0.0009599901968613267, 'fcm_dpo/q_t': 0.39742326736450195, 'fcm_dpo/delta': -0.06941938400268555, 'fcm_dpo/margin': 485.2709655761719, 'margin_dpo/margin_mean': 485.27099609375, 'margin_dpo/margin_std': 765.5435180664062, 'logps/chosen': -797.424072265625, 'logps/rejected': -1320.0191650390625, 'logps/ref_chosen': -66.5453109741211, 'logps/ref_rejected': -103.86932373046875, 'KL/chosen_KL_mean': -730.8787841796875, 'KL/rejected_KL_mean': -1216.14990234375, 'KL/mean': -973.5142822265625, 'KL/std': 658.5827026367188, 'logits/chosen': -1.0451146364212036, 'logits/rejected': -1.0700435638427734, 'epoch': 0.83} + 83%|████████▎ | 566/681 [24:05<04:58, 2.59s/it] 83%|████████▎ | 567/681 [24:07<04:52, 2.56s/it] {'loss': 1.1121, 'grad_norm': 29.64704132080078, 'learning_rate': 4.231101308059165e-08, 'fcm_dpo/beta': 0.0009558956371620297, 'fcm_dpo/q_t': 0.41558361053466797, 'fcm_dpo/delta': 0.0415302999317646, 'fcm_dpo/margin': 376.59283447265625, 'margin_dpo/margin_mean': 376.59283447265625, 'margin_dpo/margin_std': 499.68634033203125, 'logps/chosen': -741.9003295898438, 'logps/rejected': -1151.005859375, 'logps/ref_chosen': -52.85829544067383, 'logps/ref_rejected': -85.37095642089844, 'KL/chosen_KL_mean': -689.0420532226562, 'KL/rejected_KL_mean': -1065.6348876953125, 'KL/mean': -877.3385009765625, 'KL/std': 457.2042236328125, 'logits/chosen': -1.1439913511276245, 'logits/rejected': -1.1560258865356445, 'epoch': 0.83} + 83%|████████▎ | 567/681 [24:07<04:52, 2.56s/it] 83%|████████▎ | 568/681 [24:10<04:43, 2.51s/it] {'loss': 1.0304, 'grad_norm': 32.13995361328125, 'learning_rate': 4.1599403694720145e-08, 'fcm_dpo/beta': 0.0009455858962610364, 'fcm_dpo/q_t': 0.3895892798900604, 'fcm_dpo/delta': -0.0800839364528656, 'fcm_dpo/margin': 503.4710693359375, 'margin_dpo/margin_mean': 503.47100830078125, 'margin_dpo/margin_std': 561.0274658203125, 'logps/chosen': -727.551025390625, 'logps/rejected': -1274.9219970703125, 'logps/ref_chosen': -45.1923828125, 'logps/ref_rejected': -89.09236907958984, 'KL/chosen_KL_mean': -682.358642578125, 'KL/rejected_KL_mean': -1185.82958984375, 'KL/mean': -934.0941162109375, 'KL/std': 537.24072265625, 'logits/chosen': -0.9833190441131592, 'logits/rejected': -1.0224902629852295, 'epoch': 0.83} + 83%|████████▎ | 568/681 [24:10<04:43, 2.51s/it] 84%|████████▎ | 569/681 [24:12<04:44, 2.54s/it] {'loss': 1.1468, 'grad_norm': 56.671836853027344, 'learning_rate': 4.089328585837512e-08, 'fcm_dpo/beta': 0.0009511442622169852, 'fcm_dpo/q_t': 0.4123598337173462, 'fcm_dpo/delta': 0.009604483842849731, 'fcm_dpo/margin': 409.83868408203125, 'margin_dpo/margin_mean': 409.83868408203125, 'margin_dpo/margin_std': 721.668701171875, 'logps/chosen': -847.465576171875, 'logps/rejected': -1272.68701171875, 'logps/ref_chosen': -63.72056198120117, 'logps/ref_rejected': -79.10325622558594, 'KL/chosen_KL_mean': -783.7449951171875, 'KL/rejected_KL_mean': -1193.583740234375, 'KL/mean': -988.6644287109375, 'KL/std': 691.3892211914062, 'logits/chosen': -1.0582460165023804, 'logits/rejected': -1.064152479171753, 'epoch': 0.84} + 84%|████████▎ | 569/681 [24:12<04:44, 2.54s/it] 84%|████████▎ | 570/681 [24:15<04:44, 2.56s/it] {'loss': 1.1368, 'grad_norm': 27.773193359375, 'learning_rate': 4.019267817841834e-08, 'fcm_dpo/beta': 0.0009502613684162498, 'fcm_dpo/q_t': 0.41972124576568604, 'fcm_dpo/delta': 0.0452612042427063, 'fcm_dpo/margin': 375.0276184082031, 'margin_dpo/margin_mean': 375.02764892578125, 'margin_dpo/margin_std': 592.921875, 'logps/chosen': -784.64794921875, 'logps/rejected': -1180.202880859375, 'logps/ref_chosen': -61.61454391479492, 'logps/ref_rejected': -82.14186096191406, 'KL/chosen_KL_mean': -723.0333862304688, 'KL/rejected_KL_mean': -1098.06103515625, 'KL/mean': -910.5472412109375, 'KL/std': 545.8590087890625, 'logits/chosen': -1.1307826042175293, 'logits/rejected': -1.123297095298767, 'epoch': 0.84} + 84%|████████▎ | 570/681 [24:15<04:44, 2.56s/it] 84%|████████▍ | 571/681 [24:18<04:42, 2.57s/it] {'loss': 1.0993, 'grad_norm': 41.67679977416992, 'learning_rate': 3.9497599116513705e-08, 'fcm_dpo/beta': 0.0009471910889260471, 'fcm_dpo/q_t': 0.4038824439048767, 'fcm_dpo/delta': -0.030962642282247543, 'fcm_dpo/margin': 453.40087890625, 'margin_dpo/margin_mean': 453.40087890625, 'margin_dpo/margin_std': 687.6110229492188, 'logps/chosen': -770.1240234375, 'logps/rejected': -1261.8076171875, 'logps/ref_chosen': -53.05406188964844, 'logps/ref_rejected': -91.33682250976562, 'KL/chosen_KL_mean': -717.0699462890625, 'KL/rejected_KL_mean': -1170.4708251953125, 'KL/mean': -943.7703857421875, 'KL/std': 552.2366333007812, 'logits/chosen': -1.0074293613433838, 'logits/rejected': -1.0243608951568604, 'epoch': 0.84} + 84%|████████▍ | 571/681 [24:18<04:42, 2.57s/it] 84%|████████▍ | 572/681 [24:20<04:33, 2.51s/it] {'loss': 1.1112, 'grad_norm': 35.46324157714844, 'learning_rate': 3.880806698864086e-08, 'fcm_dpo/beta': 0.000938563549425453, 'fcm_dpo/q_t': 0.4047321081161499, 'fcm_dpo/delta': -0.04543805494904518, 'fcm_dpo/margin': 472.1002197265625, 'margin_dpo/margin_mean': 472.1002197265625, 'margin_dpo/margin_std': 790.4967651367188, 'logps/chosen': -811.444091796875, 'logps/rejected': -1318.64208984375, 'logps/ref_chosen': -48.45928955078125, 'logps/ref_rejected': -83.55703735351562, 'KL/chosen_KL_mean': -762.98486328125, 'KL/rejected_KL_mean': -1235.0849609375, 'KL/mean': -999.034912109375, 'KL/std': 659.6268310546875, 'logits/chosen': -1.0459448099136353, 'logits/rejected': -1.078913688659668, 'epoch': 0.84} + 84%|████████▍ | 572/681 [24:20<04:33, 2.51s/it] 84%|████████▍ | 573/681 [24:22<04:22, 2.43s/it] {'loss': 1.1033, 'grad_norm': 28.161340713500977, 'learning_rate': 3.812409996461275e-08, 'fcm_dpo/beta': 0.0009429033380001783, 'fcm_dpo/q_t': 0.4132142663002014, 'fcm_dpo/delta': 0.016076089814305305, 'fcm_dpo/margin': 407.7635498046875, 'margin_dpo/margin_mean': 407.7635498046875, 'margin_dpo/margin_std': 570.0986328125, 'logps/chosen': -808.8343505859375, 'logps/rejected': -1250.30029296875, 'logps/ref_chosen': -51.62262725830078, 'logps/ref_rejected': -85.32499694824219, 'KL/chosen_KL_mean': -757.2117919921875, 'KL/rejected_KL_mean': -1164.975341796875, 'KL/mean': -961.093505859375, 'KL/std': 571.9259643554688, 'logits/chosen': -1.086681604385376, 'logits/rejected': -1.0990102291107178, 'epoch': 0.84} + 84%|████████▍ | 573/681 [24:22<04:22, 2.43s/it] 84%|████████▍ | 574/681 [24:25<04:29, 2.52s/it] {'loss': 1.0903, 'grad_norm': 33.936519622802734, 'learning_rate': 3.74457160675965e-08, 'fcm_dpo/beta': 0.0009430091013200581, 'fcm_dpo/q_t': 0.40598154067993164, 'fcm_dpo/delta': -0.011370273306965828, 'fcm_dpo/margin': 435.6454772949219, 'margin_dpo/margin_mean': 435.6454772949219, 'margin_dpo/margin_std': 592.3309936523438, 'logps/chosen': -719.5946044921875, 'logps/rejected': -1197.001953125, 'logps/ref_chosen': -51.04446029663086, 'logps/ref_rejected': -92.80640411376953, 'KL/chosen_KL_mean': -668.5501708984375, 'KL/rejected_KL_mean': -1104.195556640625, 'KL/mean': -886.3728637695312, 'KL/std': 513.22802734375, 'logits/chosen': -1.0922186374664307, 'logits/rejected': -1.1221637725830078, 'epoch': 0.84} + 84%|████████▍ | 574/681 [24:25<04:29, 2.52s/it] 84%|████████▍ | 575/681 [24:27<04:30, 2.55s/it] {'loss': 1.1378, 'grad_norm': 35.51095199584961, 'learning_rate': 3.677293317363864e-08, 'fcm_dpo/beta': 0.0009349288884550333, 'fcm_dpo/q_t': 0.4125257134437561, 'fcm_dpo/delta': 0.014928296208381653, 'fcm_dpo/margin': 411.2996520996094, 'margin_dpo/margin_mean': 411.2996520996094, 'margin_dpo/margin_std': 677.067138671875, 'logps/chosen': -799.5704345703125, 'logps/rejected': -1234.466064453125, 'logps/ref_chosen': -71.7901382446289, 'logps/ref_rejected': -95.38619995117188, 'KL/chosen_KL_mean': -727.7802734375, 'KL/rejected_KL_mean': -1139.079833984375, 'KL/mean': -933.4301147460938, 'KL/std': 528.300537109375, 'logits/chosen': -0.9489999413490295, 'logits/rejected': -0.9602969288825989, 'epoch': 0.84} + 84%|████████▍ | 575/681 [24:28<04:30, 2.55s/it] 85%|████████▍ | 576/681 [24:30<04:22, 2.50s/it] {'loss': 1.1852, 'grad_norm': 36.57832717895508, 'learning_rate': 3.6105769011194224e-08, 'fcm_dpo/beta': 0.0009562689810991287, 'fcm_dpo/q_t': 0.43350422382354736, 'fcm_dpo/delta': 0.11256685107946396, 'fcm_dpo/margin': 304.1596374511719, 'margin_dpo/margin_mean': 304.15960693359375, 'margin_dpo/margin_std': 556.28271484375, 'logps/chosen': -744.6478881835938, 'logps/rejected': -1095.298828125, 'logps/ref_chosen': -54.262962341308594, 'logps/ref_rejected': -100.75428009033203, 'KL/chosen_KL_mean': -690.3848876953125, 'KL/rejected_KL_mean': -994.5445556640625, 'KL/mean': -842.4647216796875, 'KL/std': 470.174560546875, 'logits/chosen': -1.0214297771453857, 'logits/rejected': -1.048740029335022, 'epoch': 0.85} + 85%|████████▍ | 576/681 [24:30<04:22, 2.50s/it] 85%|████████▍ | 577/681 [24:32<04:20, 2.50s/it] {'loss': 1.1187, 'grad_norm': 29.529443740844727, 'learning_rate': 3.5444241160659304e-08, 'fcm_dpo/beta': 0.000964190810918808, 'fcm_dpo/q_t': 0.4120888113975525, 'fcm_dpo/delta': 0.00915931724011898, 'fcm_dpo/margin': 405.6855773925781, 'margin_dpo/margin_mean': 405.68560791015625, 'margin_dpo/margin_std': 598.992919921875, 'logps/chosen': -689.5594482421875, 'logps/rejected': -1117.406005859375, 'logps/ref_chosen': -61.909706115722656, 'logps/ref_rejected': -84.07069396972656, 'KL/chosen_KL_mean': -627.6497802734375, 'KL/rejected_KL_mean': -1033.33544921875, 'KL/mean': -830.4925537109375, 'KL/std': 542.0623779296875, 'logits/chosen': -1.0218915939331055, 'logits/rejected': -1.0101161003112793, 'epoch': 0.85} + 85%|████████▍ | 577/681 [24:32<04:20, 2.50s/it] 85%|████████▍ | 578/681 [24:35<04:21, 2.54s/it] {'loss': 1.0771, 'grad_norm': 50.43282699584961, 'learning_rate': 3.478836705390808e-08, 'fcm_dpo/beta': 0.0009588984539732337, 'fcm_dpo/q_t': 0.40431180596351624, 'fcm_dpo/delta': -0.01690073311328888, 'fcm_dpo/margin': 433.4109802246094, 'margin_dpo/margin_mean': 433.41094970703125, 'margin_dpo/margin_std': 535.86767578125, 'logps/chosen': -645.139892578125, 'logps/rejected': -1112.723388671875, 'logps/ref_chosen': -49.26368713378906, 'logps/ref_rejected': -83.4362564086914, 'KL/chosen_KL_mean': -595.876220703125, 'KL/rejected_KL_mean': -1029.2872314453125, 'KL/mean': -812.5816650390625, 'KL/std': 519.2777099609375, 'logits/chosen': -0.9214882850646973, 'logits/rejected': -0.9523541331291199, 'epoch': 0.85} + 85%|████████▍ | 578/681 [24:35<04:21, 2.54s/it] 85%|████████▌ | 579/681 [24:38<04:27, 2.62s/it] {'loss': 1.2174, 'grad_norm': 68.8424301147461, 'learning_rate': 3.41381639738331e-08, 'fcm_dpo/beta': 0.0009806466987356544, 'fcm_dpo/q_t': 0.43853724002838135, 'fcm_dpo/delta': 0.13623100519180298, 'fcm_dpo/margin': 272.8934326171875, 'margin_dpo/margin_mean': 272.8934326171875, 'margin_dpo/margin_std': 593.593017578125, 'logps/chosen': -782.0794677734375, 'logps/rejected': -1090.8746337890625, 'logps/ref_chosen': -58.88581848144531, 'logps/ref_rejected': -94.78762817382812, 'KL/chosen_KL_mean': -723.193603515625, 'KL/rejected_KL_mean': -996.0870361328125, 'KL/mean': -859.6403198242188, 'KL/std': 547.017578125, 'logits/chosen': -0.9866000413894653, 'logits/rejected': -0.9893920421600342, 'epoch': 0.85} + 85%|████████▌ | 579/681 [24:38<04:27, 2.62s/it] 85%|████████▌ | 580/681 [24:40<04:25, 2.63s/it] {'loss': 1.0507, 'grad_norm': 39.8839111328125, 'learning_rate': 3.349364905389032e-08, 'fcm_dpo/beta': 0.0009732701582834125, 'fcm_dpo/q_t': 0.3939579725265503, 'fcm_dpo/delta': -0.0888274759054184, 'fcm_dpo/margin': 497.30218505859375, 'margin_dpo/margin_mean': 497.3021545410156, 'margin_dpo/margin_std': 672.8572998046875, 'logps/chosen': -556.1804809570312, 'logps/rejected': -1086.5341796875, 'logps/ref_chosen': -48.70683670043945, 'logps/ref_rejected': -81.7583999633789, 'KL/chosen_KL_mean': -507.4736633300781, 'KL/rejected_KL_mean': -1004.7757568359375, 'KL/mean': -756.1246948242188, 'KL/std': 589.247802734375, 'logits/chosen': -0.8748548030853271, 'logits/rejected': -0.9090137481689453, 'epoch': 0.85} + 85%|████████▌ | 580/681 [24:40<04:25, 2.63s/it] 85%|████████▌ | 581/681 [24:43<04:17, 2.58s/it] {'loss': 1.1566, 'grad_norm': 37.68699264526367, 'learning_rate': 3.285483927764726e-08, 'fcm_dpo/beta': 0.0009817921090871096, 'fcm_dpo/q_t': 0.42394953966140747, 'fcm_dpo/delta': 0.05957435816526413, 'fcm_dpo/margin': 348.74224853515625, 'margin_dpo/margin_mean': 348.74224853515625, 'margin_dpo/margin_std': 622.7120361328125, 'logps/chosen': -772.9916381835938, 'logps/rejected': -1151.247314453125, 'logps/ref_chosen': -62.22235107421875, 'logps/ref_rejected': -91.73568725585938, 'KL/chosen_KL_mean': -710.769287109375, 'KL/rejected_KL_mean': -1059.5115966796875, 'KL/mean': -885.140380859375, 'KL/std': 566.3184814453125, 'logits/chosen': -1.0647389888763428, 'logits/rejected': -1.0730311870574951, 'epoch': 0.85} + 85%|████████▌ | 581/681 [24:43<04:17, 2.58s/it] 85%|████████▌ | 582/681 [24:46<04:16, 2.59s/it] {'loss': 1.1147, 'grad_norm': 29.930849075317383, 'learning_rate': 3.222175147833556e-08, 'fcm_dpo/beta': 0.0009761706460267305, 'fcm_dpo/q_t': 0.41167423129081726, 'fcm_dpo/delta': -0.07844623178243637, 'fcm_dpo/margin': 387.3603515625, 'margin_dpo/margin_mean': 387.3603820800781, 'margin_dpo/margin_std': 530.1618041992188, 'logps/chosen': -676.7191772460938, 'logps/rejected': -1115.9205322265625, 'logps/ref_chosen': -58.228660583496094, 'logps/ref_rejected': -110.06959533691406, 'KL/chosen_KL_mean': -618.490478515625, 'KL/rejected_KL_mean': -1005.8509521484375, 'KL/mean': -812.1707153320312, 'KL/std': 488.8114929199219, 'logits/chosen': -1.0147862434387207, 'logits/rejected': -1.0369963645935059, 'epoch': 0.85} + 85%|████████▌ | 582/681 [24:46<04:16, 2.59s/it] 86%|████████▌ | 583/681 [24:48<04:15, 2.61s/it] {'loss': 1.2428, 'grad_norm': 65.24808502197266, 'learning_rate': 3.159440233840763e-08, 'fcm_dpo/beta': 0.0009659786010161042, 'fcm_dpo/q_t': 0.4426102340221405, 'fcm_dpo/delta': -0.013101693242788315, 'fcm_dpo/margin': 264.35589599609375, 'margin_dpo/margin_mean': 264.3559265136719, 'margin_dpo/margin_std': 639.0501098632812, 'logps/chosen': -748.341552734375, 'logps/rejected': -1044.238525390625, 'logps/ref_chosen': -56.86286163330078, 'logps/ref_rejected': -88.4039306640625, 'KL/chosen_KL_mean': -691.4786376953125, 'KL/rejected_KL_mean': -955.8345947265625, 'KL/mean': -823.6566162109375, 'KL/std': 531.1171264648438, 'logits/chosen': -0.9646916389465332, 'logits/rejected': -0.9632136821746826, 'epoch': 0.86} + 86%|████████▌ | 583/681 [24:48<04:15, 2.61s/it] 86%|████████▌ | 584/681 [24:51<04:07, 2.55s/it] {'loss': 1.0358, 'grad_norm': 36.447509765625, 'learning_rate': 3.0972808389096635e-08, 'fcm_dpo/beta': 0.0009539815364405513, 'fcm_dpo/q_t': 0.39147210121154785, 'fcm_dpo/delta': -0.08356797695159912, 'fcm_dpo/margin': 502.6873779296875, 'margin_dpo/margin_mean': 502.6874084472656, 'margin_dpo/margin_std': 587.2181396484375, 'logps/chosen': -665.2413330078125, 'logps/rejected': -1208.6640625, 'logps/ref_chosen': -56.90068054199219, 'logps/ref_rejected': -97.63606262207031, 'KL/chosen_KL_mean': -608.3406982421875, 'KL/rejected_KL_mean': -1111.0279541015625, 'KL/mean': -859.684326171875, 'KL/std': 554.8764038085938, 'logits/chosen': -0.9898185133934021, 'logits/rejected': -1.0079997777938843, 'epoch': 0.86} + 86%|████████▌ | 584/681 [24:51<04:07, 2.55s/it] 86%|████████▌ | 585/681 [24:53<04:02, 2.53s/it] {'loss': 1.1155, 'grad_norm': 32.03108215332031, 'learning_rate': 3.035698600998121e-08, 'fcm_dpo/beta': 0.0009440815774723887, 'fcm_dpo/q_t': 0.4053837060928345, 'fcm_dpo/delta': -0.022542130202054977, 'fcm_dpo/margin': 446.328369140625, 'margin_dpo/margin_mean': 446.328369140625, 'margin_dpo/margin_std': 727.369384765625, 'logps/chosen': -757.5743408203125, 'logps/rejected': -1227.0982666015625, 'logps/ref_chosen': -60.973968505859375, 'logps/ref_rejected': -84.16952514648438, 'KL/chosen_KL_mean': -696.600341796875, 'KL/rejected_KL_mean': -1142.9287109375, 'KL/mean': -919.7645263671875, 'KL/std': 609.264892578125, 'logits/chosen': -0.9939338564872742, 'logits/rejected': -1.0176794528961182, 'epoch': 0.86} + 86%|████████▌ | 585/681 [24:53<04:02, 2.53s/it] 86%|████████▌ | 586/681 [24:56<03:59, 2.52s/it] {'loss': 1.2015, 'grad_norm': 36.572792053222656, 'learning_rate': 2.974695142855388e-08, 'fcm_dpo/beta': 0.0009613102884031832, 'fcm_dpo/q_t': 0.43567806482315063, 'fcm_dpo/delta': 0.12152184545993805, 'fcm_dpo/margin': 293.4059143066406, 'margin_dpo/margin_mean': 293.4058837890625, 'margin_dpo/margin_std': 593.2252197265625, 'logps/chosen': -800.1837158203125, 'logps/rejected': -1128.53662109375, 'logps/ref_chosen': -56.85559844970703, 'logps/ref_rejected': -91.80261993408203, 'KL/chosen_KL_mean': -743.328125, 'KL/rejected_KL_mean': -1036.73388671875, 'KL/mean': -890.031005859375, 'KL/std': 537.0108642578125, 'logits/chosen': -1.0138568878173828, 'logits/rejected': -1.0334415435791016, 'epoch': 0.86} + 86%|████████▌ | 586/681 [24:56<03:59, 2.52s/it] 86%|████████▌ | 587/681 [24:58<04:01, 2.57s/it] {'loss': 1.0931, 'grad_norm': 42.70491409301758, 'learning_rate': 2.9142720719793122e-08, 'fcm_dpo/beta': 0.0009705749107524753, 'fcm_dpo/q_t': 0.40726912021636963, 'fcm_dpo/delta': -0.0043886564671993256, 'fcm_dpo/margin': 416.36767578125, 'margin_dpo/margin_mean': 416.36767578125, 'margin_dpo/margin_std': 568.321044921875, 'logps/chosen': -552.2236328125, 'logps/rejected': -1006.5235595703125, 'logps/ref_chosen': -44.69159698486328, 'logps/ref_rejected': -82.62385559082031, 'KL/chosen_KL_mean': -507.5320129394531, 'KL/rejected_KL_mean': -923.899658203125, 'KL/mean': -715.7158813476562, 'KL/std': 561.7125244140625, 'logits/chosen': -1.0300676822662354, 'logits/rejected': -1.0568914413452148, 'epoch': 0.86} + 86%|████████▌ | 587/681 [24:58<04:01, 2.57s/it] 86%|████████▋ | 588/681 [25:01<03:52, 2.50s/it] {'loss': 1.1426, 'grad_norm': 31.037988662719727, 'learning_rate': 2.8544309805740018e-08, 'fcm_dpo/beta': 0.0009743442060425878, 'fcm_dpo/q_t': 0.4227873980998993, 'fcm_dpo/delta': 0.06948099285364151, 'fcm_dpo/margin': 341.55035400390625, 'margin_dpo/margin_mean': 341.55035400390625, 'margin_dpo/margin_std': 514.8743896484375, 'logps/chosen': -736.037841796875, 'logps/rejected': -1134.6630859375, 'logps/ref_chosen': -50.29494857788086, 'logps/ref_rejected': -107.36988067626953, 'KL/chosen_KL_mean': -685.742919921875, 'KL/rejected_KL_mean': -1027.293212890625, 'KL/mean': -856.51806640625, 'KL/std': 494.27239990234375, 'logits/chosen': -0.9946512579917908, 'logits/rejected': -1.018219232559204, 'epoch': 0.86} + 86%|████████▋ | 588/681 [25:01<03:52, 2.50s/it] 86%|████████▋ | 589/681 [25:03<03:49, 2.49s/it] {'loss': 1.0564, 'grad_norm': 30.49479103088379, 'learning_rate': 2.7951734455078786e-08, 'fcm_dpo/beta': 0.0009745459537953138, 'fcm_dpo/q_t': 0.3962155878543854, 'fcm_dpo/delta': -0.0563356988132, 'fcm_dpo/margin': 465.66778564453125, 'margin_dpo/margin_mean': 465.66778564453125, 'margin_dpo/margin_std': 579.8321533203125, 'logps/chosen': -735.9003295898438, 'logps/rejected': -1253.2935791015625, 'logps/ref_chosen': -59.929908752441406, 'logps/ref_rejected': -111.65534973144531, 'KL/chosen_KL_mean': -675.970458984375, 'KL/rejected_KL_mean': -1141.63818359375, 'KL/mean': -908.8043212890625, 'KL/std': 545.934326171875, 'logits/chosen': -0.9664604663848877, 'logits/rejected': -0.9765450954437256, 'epoch': 0.86} + 86%|████████▋ | 589/681 [25:03<03:49, 2.49s/it] 87%|████████▋ | 590/681 [25:05<03:43, 2.46s/it] {'loss': 1.0625, 'grad_norm': 30.36831283569336, 'learning_rate': 2.736501028272095e-08, 'fcm_dpo/beta': 0.0009633679874241352, 'fcm_dpo/q_t': 0.3995480537414551, 'fcm_dpo/delta': -0.04375208914279938, 'fcm_dpo/margin': 458.553466796875, 'margin_dpo/margin_mean': 458.553466796875, 'margin_dpo/margin_std': 575.5927734375, 'logps/chosen': -641.8304443359375, 'logps/rejected': -1150.636962890625, 'logps/ref_chosen': -55.80979537963867, 'logps/ref_rejected': -106.06282043457031, 'KL/chosen_KL_mean': -586.0206298828125, 'KL/rejected_KL_mean': -1044.573974609375, 'KL/mean': -815.29736328125, 'KL/std': 534.37109375, 'logits/chosen': -0.9607778191566467, 'logits/rejected': -0.9915695190429688, 'epoch': 0.87} + 87%|████████▋ | 590/681 [25:05<03:43, 2.46s/it] 87%|████████▋ | 591/681 [25:08<03:33, 2.37s/it] {'loss': 1.0985, 'grad_norm': 31.83711051940918, 'learning_rate': 2.678415274939408e-08, 'fcm_dpo/beta': 0.0009612845606170595, 'fcm_dpo/q_t': 0.40647366642951965, 'fcm_dpo/delta': -0.005106211174279451, 'fcm_dpo/margin': 421.20489501953125, 'margin_dpo/margin_mean': 421.20489501953125, 'margin_dpo/margin_std': 595.3729248046875, 'logps/chosen': -724.1729736328125, 'logps/rejected': -1172.9234619140625, 'logps/ref_chosen': -56.24061965942383, 'logps/ref_rejected': -83.78629302978516, 'KL/chosen_KL_mean': -667.932373046875, 'KL/rejected_KL_mean': -1089.13720703125, 'KL/mean': -878.5347900390625, 'KL/std': 525.6906127929688, 'logits/chosen': -1.0476133823394775, 'logits/rejected': -1.0394680500030518, 'epoch': 0.87} + 87%|████████▋ | 591/681 [25:08<03:33, 2.37s/it] 87%|████████▋ | 592/681 [25:10<03:37, 2.44s/it] {'loss': 1.1713, 'grad_norm': 38.8540153503418, 'learning_rate': 2.6209177161234442e-08, 'fcm_dpo/beta': 0.0009650047868490219, 'fcm_dpo/q_t': 0.41676104068756104, 'fcm_dpo/delta': 0.031190991401672363, 'fcm_dpo/margin': 383.3913269042969, 'margin_dpo/margin_mean': 383.391357421875, 'margin_dpo/margin_std': 732.060546875, 'logps/chosen': -754.1591186523438, 'logps/rejected': -1165.343017578125, 'logps/ref_chosen': -47.94025421142578, 'logps/ref_rejected': -75.73287963867188, 'KL/chosen_KL_mean': -706.2188720703125, 'KL/rejected_KL_mean': -1089.610107421875, 'KL/mean': -897.91455078125, 'KL/std': 542.8892211914062, 'logits/chosen': -1.016085147857666, 'logits/rejected': -1.019473910331726, 'epoch': 0.87} + 87%|████████▋ | 592/681 [25:10<03:37, 2.44s/it] 87%|████████▋ | 593/681 [25:13<03:38, 2.48s/it] {'loss': 1.1964, 'grad_norm': 47.76630783081055, 'learning_rate': 2.564009866938349e-08, 'fcm_dpo/beta': 0.0009783029090613127, 'fcm_dpo/q_t': 0.4322025775909424, 'fcm_dpo/delta': 0.09824425727128983, 'fcm_dpo/margin': 311.6109313964844, 'margin_dpo/margin_mean': 311.61090087890625, 'margin_dpo/margin_std': 633.5354614257812, 'logps/chosen': -707.9406127929688, 'logps/rejected': -1031.768798828125, 'logps/ref_chosen': -48.690757751464844, 'logps/ref_rejected': -60.90800094604492, 'KL/chosen_KL_mean': -659.2498779296875, 'KL/rejected_KL_mean': -970.86083984375, 'KL/mean': -815.0553588867188, 'KL/std': 574.805908203125, 'logits/chosen': -0.8949644565582275, 'logits/rejected': -0.8865162134170532, 'epoch': 0.87} + 87%|████████▋ | 593/681 [25:13<03:38, 2.48s/it] 87%|████████▋ | 594/681 [25:15<03:36, 2.48s/it] {'loss': 1.1352, 'grad_norm': 40.98539733886719, 'learning_rate': 2.5076932269588708e-08, 'fcm_dpo/beta': 0.0009973826818168163, 'fcm_dpo/q_t': 0.4159358739852905, 'fcm_dpo/delta': 0.02790883556008339, 'fcm_dpo/margin': 372.99493408203125, 'margin_dpo/margin_mean': 372.99493408203125, 'margin_dpo/margin_std': 592.751708984375, 'logps/chosen': -688.919921875, 'logps/rejected': -1093.07958984375, 'logps/ref_chosen': -54.93488693237305, 'logps/ref_rejected': -86.09967803955078, 'KL/chosen_KL_mean': -633.9850463867188, 'KL/rejected_KL_mean': -1006.97998046875, 'KL/mean': -820.4825439453125, 'KL/std': 562.014404296875, 'logits/chosen': -0.9719296097755432, 'logits/rejected': -0.9616006016731262, 'epoch': 0.87} + 87%|████████▋ | 594/681 [25:15<03:36, 2.48s/it] 87%|████████▋ | 595/681 [25:18<03:36, 2.52s/it] {'loss': 1.0866, 'grad_norm': 43.79144287109375, 'learning_rate': 2.451969280180849e-08, 'fcm_dpo/beta': 0.0009868217166513205, 'fcm_dpo/q_t': 0.4070265293121338, 'fcm_dpo/delta': -0.022147677838802338, 'fcm_dpo/margin': 426.7290344238281, 'margin_dpo/margin_mean': 426.7290344238281, 'margin_dpo/margin_std': 598.78759765625, 'logps/chosen': -657.676025390625, 'logps/rejected': -1115.6119384765625, 'logps/ref_chosen': -49.4204216003418, 'logps/ref_rejected': -80.62731170654297, 'KL/chosen_KL_mean': -608.255615234375, 'KL/rejected_KL_mean': -1034.984619140625, 'KL/mean': -821.6201171875, 'KL/std': 540.2607421875, 'logits/chosen': -0.944753885269165, 'logits/rejected': -0.9602541923522949, 'epoch': 0.87} + 87%|████████▋ | 595/681 [25:18<03:36, 2.52s/it] 88%|████████▊ | 596/681 [25:21<03:36, 2.55s/it] {'loss': 1.2115, 'grad_norm': 68.51116180419922, 'learning_rate': 2.396839494982103e-08, 'fcm_dpo/beta': 0.0010060444474220276, 'fcm_dpo/q_t': 0.4370243549346924, 'fcm_dpo/delta': 0.11245694756507874, 'fcm_dpo/margin': 288.8142395019531, 'margin_dpo/margin_mean': 288.8142395019531, 'margin_dpo/margin_std': 640.1383666992188, 'logps/chosen': -752.0576171875, 'logps/rejected': -1061.17138671875, 'logps/ref_chosen': -59.791683197021484, 'logps/ref_rejected': -80.09111785888672, 'KL/chosen_KL_mean': -692.2659301757812, 'KL/rejected_KL_mean': -981.0802001953125, 'KL/mean': -836.673095703125, 'KL/std': 531.4737548828125, 'logits/chosen': -0.9666332006454468, 'logits/rejected': -0.9320765733718872, 'epoch': 0.88} + 88%|████████▊ | 596/681 [25:21<03:36, 2.55s/it] 88%|████████▊ | 597/681 [25:23<03:35, 2.56s/it] {'loss': 1.0463, 'grad_norm': 28.101728439331055, 'learning_rate': 2.3423053240837514e-08, 'fcm_dpo/beta': 0.0009824027074500918, 'fcm_dpo/q_t': 0.3876197040081024, 'fcm_dpo/delta': -0.11863398551940918, 'fcm_dpo/margin': 518.5908203125, 'margin_dpo/margin_mean': 518.5908203125, 'margin_dpo/margin_std': 676.21826171875, 'logps/chosen': -710.17626953125, 'logps/rejected': -1272.2000732421875, 'logps/ref_chosen': -57.26078796386719, 'logps/ref_rejected': -100.6937255859375, 'KL/chosen_KL_mean': -652.91552734375, 'KL/rejected_KL_mean': -1171.50634765625, 'KL/mean': -912.2109375, 'KL/std': 616.0120849609375, 'logits/chosen': -0.9247469305992126, 'logits/rejected': -0.9725657105445862, 'epoch': 0.88} + 88%|████████▊ | 597/681 [25:23<03:35, 2.56s/it] 88%|████████▊ | 598/681 [25:25<03:27, 2.50s/it] {'loss': 1.1089, 'grad_norm': 34.318355560302734, 'learning_rate': 2.2883682045119062e-08, 'fcm_dpo/beta': 0.0009790980257093906, 'fcm_dpo/q_t': 0.4087187945842743, 'fcm_dpo/delta': 0.002352789044380188, 'fcm_dpo/margin': 405.0446472167969, 'margin_dpo/margin_mean': 405.0446472167969, 'margin_dpo/margin_std': 579.4035034179688, 'logps/chosen': -707.9893798828125, 'logps/rejected': -1149.95947265625, 'logps/ref_chosen': -52.51850509643555, 'logps/ref_rejected': -89.44385528564453, 'KL/chosen_KL_mean': -655.470947265625, 'KL/rejected_KL_mean': -1060.515625, 'KL/mean': -857.9932250976562, 'KL/std': 524.6902465820312, 'logits/chosen': -1.0320333242416382, 'logits/rejected': -1.0434290170669556, 'epoch': 0.88} + 88%|████████▊ | 598/681 [25:26<03:27, 2.50s/it] 88%|████████▊ | 599/681 [25:28<03:33, 2.60s/it] {'loss': 1.1342, 'grad_norm': 32.6776123046875, 'learning_rate': 2.2350295575598367e-08, 'fcm_dpo/beta': 0.0009746775031089783, 'fcm_dpo/q_t': 0.41758590936660767, 'fcm_dpo/delta': -0.061856959015131, 'fcm_dpo/margin': 349.8406982421875, 'margin_dpo/margin_mean': 349.8407287597656, 'margin_dpo/margin_std': 487.59490966796875, 'logps/chosen': -723.2208251953125, 'logps/rejected': -1106.2373046875, 'logps/ref_chosen': -49.802677154541016, 'logps/ref_rejected': -82.978515625, 'KL/chosen_KL_mean': -673.4180908203125, 'KL/rejected_KL_mean': -1023.2587890625, 'KL/mean': -848.3385009765625, 'KL/std': 517.2516479492188, 'logits/chosen': -0.9535913467407227, 'logits/rejected': -0.9626870155334473, 'epoch': 0.88} + 88%|████████▊ | 599/681 [25:28<03:33, 2.60s/it] 88%|████████▊ | 600/681 [25:31<03:33, 2.63s/it] {'loss': 1.1868, 'grad_norm': 33.96622848510742, 'learning_rate': 2.1822907887504932e-08, 'fcm_dpo/beta': 0.0009859842248260975, 'fcm_dpo/q_t': 0.430108904838562, 'fcm_dpo/delta': 0.0903782919049263, 'fcm_dpo/margin': 316.98529052734375, 'margin_dpo/margin_mean': 316.98529052734375, 'margin_dpo/margin_std': 626.9907836914062, 'logps/chosen': -793.16845703125, 'logps/rejected': -1129.17529296875, 'logps/ref_chosen': -66.43487548828125, 'logps/ref_rejected': -85.45649719238281, 'KL/chosen_KL_mean': -726.7335205078125, 'KL/rejected_KL_mean': -1043.7188720703125, 'KL/mean': -885.2261962890625, 'KL/std': 520.7708740234375, 'logits/chosen': -1.0657624006271362, 'logits/rejected': -1.0632259845733643, 'epoch': 0.88} + 88%|████████▊ | 600/681 [25:31<03:33, 2.63s/it] 88%|████████▊ | 601/681 [25:33<03:23, 2.55s/it] {'loss': 1.0847, 'grad_norm': 31.343103408813477, 'learning_rate': 2.1301532877994742e-08, 'fcm_dpo/beta': 0.0009882240556180477, 'fcm_dpo/q_t': 0.4040505588054657, 'fcm_dpo/delta': -0.01307043619453907, 'fcm_dpo/margin': 417.3103942871094, 'margin_dpo/margin_mean': 417.3103942871094, 'margin_dpo/margin_std': 547.4290771484375, 'logps/chosen': -796.3963623046875, 'logps/rejected': -1249.26416015625, 'logps/ref_chosen': -59.13361358642578, 'logps/ref_rejected': -94.69093322753906, 'KL/chosen_KL_mean': -737.2628173828125, 'KL/rejected_KL_mean': -1154.5731201171875, 'KL/mean': -945.91796875, 'KL/std': 556.4426879882812, 'logits/chosen': -0.97291100025177, 'logits/rejected': -0.9917502403259277, 'epoch': 0.88} + 88%|████████▊ | 601/681 [25:33<03:23, 2.55s/it] 88%|████████▊ | 602/681 [25:36<03:20, 2.54s/it] {'loss': 1.025, 'grad_norm': 67.23153686523438, 'learning_rate': 2.0786184285784298e-08, 'fcm_dpo/beta': 0.0009830892086029053, 'fcm_dpo/q_t': 0.3889528512954712, 'fcm_dpo/delta': -0.08871287107467651, 'fcm_dpo/margin': 492.5085754394531, 'margin_dpo/margin_mean': 492.5085754394531, 'margin_dpo/margin_std': 532.3154296875, 'logps/chosen': -554.3673095703125, 'logps/rejected': -1085.950927734375, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'KL/chosen_KL_mean': -505.7738037109375, 'KL/rejected_KL_mean': -998.2823486328125, 'KL/mean': -752.028076171875, 'KL/std': 502.33154296875, 'logits/chosen': -1.0278465747833252, 'logits/rejected': -1.060103416442871, 'epoch': 0.88} + 88%|████████▊ | 602/681 [25:36<03:20, 2.54s/it] 89%|████████▊ | 603/681 [25:38<03:15, 2.51s/it] {'loss': 1.0822, 'grad_norm': 38.15021896362305, 'learning_rate': 2.0276875690788204e-08, 'fcm_dpo/beta': 0.0009643337689340115, 'fcm_dpo/q_t': 0.40109044313430786, 'fcm_dpo/delta': -0.05405785143375397, 'fcm_dpo/margin': 468.2989501953125, 'margin_dpo/margin_mean': 468.2989501953125, 'margin_dpo/margin_std': 690.9556274414062, 'logps/chosen': -708.1666259765625, 'logps/rejected': -1206.3765869140625, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32559967041016, 'KL/chosen_KL_mean': -637.751953125, 'KL/rejected_KL_mean': -1106.051025390625, 'KL/mean': -871.9014892578125, 'KL/std': 581.3104248046875, 'logits/chosen': -0.9976146817207336, 'logits/rejected': -0.9900000095367432, 'epoch': 0.89} + 89%|████████▊ | 603/681 [25:38<03:15, 2.51s/it] 89%|████████▊ | 604/681 [25:41<03:19, 2.60s/it] {'loss': 1.0656, 'grad_norm': 38.61325454711914, 'learning_rate': 1.977362051376158e-08, 'fcm_dpo/beta': 0.0009511223761364818, 'fcm_dpo/q_t': 0.3955162465572357, 'fcm_dpo/delta': -0.07592622190713882, 'fcm_dpo/margin': 496.60693359375, 'margin_dpo/margin_mean': 496.60693359375, 'margin_dpo/margin_std': 690.3172607421875, 'logps/chosen': -675.800537109375, 'logps/rejected': -1217.8038330078125, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'KL/chosen_KL_mean': -629.3424072265625, 'KL/rejected_KL_mean': -1125.9493408203125, 'KL/mean': -877.6458740234375, 'KL/std': 553.5061645507812, 'logits/chosen': -0.9782446622848511, 'logits/rejected': -1.012909173965454, 'epoch': 0.89} + 89%|████████▊ | 604/681 [25:41<03:19, 2.60s/it] 89%|████████▉ | 605/681 [25:44<03:18, 2.61s/it] {'loss': 1.1403, 'grad_norm': 34.01826095581055, 'learning_rate': 1.9276432015946446e-08, 'fcm_dpo/beta': 0.0009511103853583336, 'fcm_dpo/q_t': 0.4207463264465332, 'fcm_dpo/delta': 0.04394224286079407, 'fcm_dpo/margin': 375.9776611328125, 'margin_dpo/margin_mean': 375.9776306152344, 'margin_dpo/margin_std': 629.7609252929688, 'logps/chosen': -737.1328125, 'logps/rejected': -1149.166015625, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'KL/chosen_KL_mean': -670.883544921875, 'KL/rejected_KL_mean': -1046.861083984375, 'KL/mean': -858.872314453125, 'KL/std': 531.7296752929688, 'logits/chosen': -0.9739004969596863, 'logits/rejected': -0.9862950444221497, 'epoch': 0.89} + 89%|████████▉ | 605/681 [25:44<03:18, 2.61s/it] 89%|████████▉ | 606/681 [25:46<03:11, 2.55s/it] {'loss': 1.0911, 'grad_norm': 25.309036254882812, 'learning_rate': 1.8785323298722093e-08, 'fcm_dpo/beta': 0.0009582208003848791, 'fcm_dpo/q_t': 0.40722784399986267, 'fcm_dpo/delta': -0.011937655508518219, 'fcm_dpo/margin': 429.0714416503906, 'margin_dpo/margin_mean': 429.0714111328125, 'margin_dpo/margin_std': 591.7274169921875, 'logps/chosen': -721.4645385742188, 'logps/rejected': -1194.08837890625, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37146759033203, 'KL/chosen_KL_mean': -666.6454467773438, 'KL/rejected_KL_mean': -1095.7169189453125, 'KL/mean': -881.18115234375, 'KL/std': 542.5753173828125, 'logits/chosen': -0.9902355670928955, 'logits/rejected': -1.004211664199829, 'epoch': 0.89} + 89%|████████▉ | 606/681 [25:46<03:11, 2.55s/it] 89%|████████▉ | 607/681 [25:49<03:10, 2.58s/it] {'loss': 1.1544, 'grad_norm': 29.7037353515625, 'learning_rate': 1.8300307303259904e-08, 'fcm_dpo/beta': 0.0009662234224379063, 'fcm_dpo/q_t': 0.42443907260894775, 'fcm_dpo/delta': 0.06767666339874268, 'fcm_dpo/margin': 345.88134765625, 'margin_dpo/margin_mean': 345.88134765625, 'margin_dpo/margin_std': 584.5785522460938, 'logps/chosen': -743.7513427734375, 'logps/rejected': -1111.32568359375, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'KL/chosen_KL_mean': -685.667236328125, 'KL/rejected_KL_mean': -1031.548583984375, 'KL/mean': -858.60791015625, 'KL/std': 543.4450073242188, 'logits/chosen': -0.9784862399101257, 'logits/rejected': -0.9676879048347473, 'epoch': 0.89} + 89%|████████▉ | 607/681 [25:49<03:10, 2.58s/it] 89%|████████▉ | 608/681 [25:51<03:04, 2.52s/it] {'loss': 1.091, 'grad_norm': 36.573951721191406, 'learning_rate': 1.7821396810182437e-08, 'fcm_dpo/beta': 0.0009703817777335644, 'fcm_dpo/q_t': 0.409574419260025, 'fcm_dpo/delta': 0.009458957239985466, 'fcm_dpo/margin': 402.6909484863281, 'margin_dpo/margin_mean': 402.69097900390625, 'margin_dpo/margin_std': 511.0771179199219, 'logps/chosen': -676.75, 'logps/rejected': -1116.7635498046875, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'KL/chosen_KL_mean': -619.2991943359375, 'KL/rejected_KL_mean': -1021.9901733398438, 'KL/mean': -820.6446533203125, 'KL/std': 493.3348388671875, 'logits/chosen': -1.0082218647003174, 'logits/rejected': -1.019978642463684, 'epoch': 0.89} + 89%|████████▉ | 608/681 [25:51<03:04, 2.52s/it] 89%|████████▉ | 609/681 [25:54<02:56, 2.46s/it] {'loss': 1.0668, 'grad_norm': 28.152240753173828, 'learning_rate': 1.7348604439226617e-08, 'fcm_dpo/beta': 0.0009552284609526396, 'fcm_dpo/q_t': 0.395630419254303, 'fcm_dpo/delta': -0.09746446460485458, 'fcm_dpo/margin': 515.8161010742188, 'margin_dpo/margin_mean': 515.8161010742188, 'margin_dpo/margin_std': 792.5299682617188, 'logps/chosen': -697.7459716796875, 'logps/rejected': -1243.57275390625, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'KL/chosen_KL_mean': -638.9405517578125, 'KL/rejected_KL_mean': -1154.756591796875, 'KL/mean': -896.8486328125, 'KL/std': 665.662841796875, 'logits/chosen': -1.0558668375015259, 'logits/rejected': -1.0789850950241089, 'epoch': 0.89} + 89%|████████▉ | 609/681 [25:54<02:56, 2.46s/it] 90%|████████▉ | 610/681 [25:56<02:52, 2.43s/it] {'loss': 1.175, 'grad_norm': 38.88047409057617, 'learning_rate': 1.6881942648911074e-08, 'fcm_dpo/beta': 0.0009649534476920962, 'fcm_dpo/q_t': 0.42811504006385803, 'fcm_dpo/delta': 0.09404957294464111, 'fcm_dpo/margin': 319.82086181640625, 'margin_dpo/margin_mean': 319.8208923339844, 'margin_dpo/margin_std': 581.705078125, 'logps/chosen': -696.9310913085938, 'logps/rejected': -1034.46240234375, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.40538787841797, 'KL/chosen_KL_mean': -631.236083984375, 'KL/rejected_KL_mean': -951.0569458007812, 'KL/mean': -791.146484375, 'KL/std': 500.4407653808594, 'logits/chosen': -0.9684814214706421, 'logits/rejected': -0.9458719491958618, 'epoch': 0.9} + 90%|████████▉ | 610/681 [25:56<02:52, 2.43s/it] 90%|████████▉ | 611/681 [25:58<02:48, 2.41s/it] {'loss': 1.0461, 'grad_norm': 27.193374633789062, 'learning_rate': 1.6421423736208e-08, 'fcm_dpo/beta': 0.0009503072360530496, 'fcm_dpo/q_t': 0.38792964816093445, 'fcm_dpo/delta': -0.12105247378349304, 'fcm_dpo/margin': 541.71533203125, 'margin_dpo/margin_mean': 541.71533203125, 'margin_dpo/margin_std': 745.772705078125, 'logps/chosen': -718.54931640625, 'logps/rejected': -1293.9962158203125, 'logps/ref_chosen': -52.59946823120117, 'logps/ref_rejected': -86.33099365234375, 'KL/chosen_KL_mean': -665.9498291015625, 'KL/rejected_KL_mean': -1207.665283203125, 'KL/mean': -936.8076171875, 'KL/std': 653.8685302734375, 'logits/chosen': -1.0235629081726074, 'logits/rejected': -1.0670585632324219, 'epoch': 0.9} + 90%|████████▉ | 611/681 [25:58<02:48, 2.41s/it] 90%|████████▉ | 612/681 [26:01<02:43, 2.38s/it] {'loss': 1.1061, 'grad_norm': 31.31951141357422, 'learning_rate': 1.5967059836219042e-08, 'fcm_dpo/beta': 0.000949513225350529, 'fcm_dpo/q_t': 0.4112043082714081, 'fcm_dpo/delta': 0.01131674274802208, 'fcm_dpo/margin': 409.5210266113281, 'margin_dpo/margin_mean': 409.5210266113281, 'margin_dpo/margin_std': 582.0534057617188, 'logps/chosen': -786.1910400390625, 'logps/rejected': -1224.70068359375, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'KL/chosen_KL_mean': -726.8673095703125, 'KL/rejected_KL_mean': -1136.388427734375, 'KL/mean': -931.6278076171875, 'KL/std': 533.6715087890625, 'logits/chosen': -1.0205453634262085, 'logits/rejected': -1.0193266868591309, 'epoch': 0.9} + 90%|████████▉ | 612/681 [26:01<02:43, 2.38s/it] 90%|█████████ | 613/681 [26:03<02:44, 2.42s/it] {'loss': 1.0324, 'grad_norm': 34.82392883300781, 'learning_rate': 1.551886292185553e-08, 'fcm_dpo/beta': 0.0009315350907854736, 'fcm_dpo/q_t': 0.3888673782348633, 'fcm_dpo/delta': -0.09393209218978882, 'fcm_dpo/margin': 524.9864501953125, 'margin_dpo/margin_mean': 524.9863891601562, 'margin_dpo/margin_std': 627.1439208984375, 'logps/chosen': -678.7138671875, 'logps/rejected': -1249.077880859375, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10752868652344, 'KL/chosen_KL_mean': -618.98388671875, 'KL/rejected_KL_mean': -1143.9703369140625, 'KL/mean': -881.4771728515625, 'KL/std': 597.9173583984375, 'logits/chosen': -1.0204041004180908, 'logits/rejected': -1.0734975337982178, 'epoch': 0.9} + 90%|█████████ | 613/681 [26:03<02:44, 2.42s/it] 90%|█████████ | 614/681 [26:06<02:43, 2.44s/it] {'loss': 1.0741, 'grad_norm': 43.358585357666016, 'learning_rate': 1.507684480352292e-08, 'fcm_dpo/beta': 0.000922086532227695, 'fcm_dpo/q_t': 0.39733168482780457, 'fcm_dpo/delta': -0.06259925663471222, 'fcm_dpo/margin': 498.6150207519531, 'margin_dpo/margin_mean': 498.614990234375, 'margin_dpo/margin_std': 705.9459228515625, 'logps/chosen': -741.399658203125, 'logps/rejected': -1291.755126953125, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'KL/chosen_KL_mean': -688.460693359375, 'KL/rejected_KL_mean': -1187.07568359375, 'KL/mean': -937.7681884765625, 'KL/std': 594.8397827148438, 'logits/chosen': -1.0039961338043213, 'logits/rejected': -1.0779341459274292, 'epoch': 0.9} + 90%|█████████ | 614/681 [26:06<02:43, 2.44s/it] 90%|█████████ | 615/681 [26:08<02:47, 2.53s/it] {'loss': 1.1269, 'grad_norm': 27.085163116455078, 'learning_rate': 1.4641017128809801e-08, 'fcm_dpo/beta': 0.0009228853159584105, 'fcm_dpo/q_t': 0.4132547974586487, 'fcm_dpo/delta': 0.017665421590209007, 'fcm_dpo/margin': 414.74560546875, 'margin_dpo/margin_mean': 414.7456359863281, 'margin_dpo/margin_std': 670.7782592773438, 'logps/chosen': -730.2218627929688, 'logps/rejected': -1174.32763671875, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'KL/chosen_KL_mean': -664.404541015625, 'KL/rejected_KL_mean': -1079.150146484375, 'KL/mean': -871.7774047851562, 'KL/std': 616.7755126953125, 'logits/chosen': -0.9974070191383362, 'logits/rejected': -1.0210152864456177, 'epoch': 0.9} + 90%|█████████ | 615/681 [26:08<02:47, 2.53s/it] 90%|█████████ | 616/681 [26:11<02:49, 2.61s/it] {'loss': 1.1748, 'grad_norm': 33.06167221069336, 'learning_rate': 1.4211391382180637e-08, 'fcm_dpo/beta': 0.00093449791893363, 'fcm_dpo/q_t': 0.430539608001709, 'fcm_dpo/delta': 0.09862032532691956, 'fcm_dpo/margin': 325.71380615234375, 'margin_dpo/margin_mean': 325.71380615234375, 'margin_dpo/margin_std': 578.4287719726562, 'logps/chosen': -851.3575439453125, 'logps/rejected': -1186.638916015625, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'KL/chosen_KL_mean': -786.2246704101562, 'KL/rejected_KL_mean': -1111.9384765625, 'KL/mean': -949.08154296875, 'KL/std': 518.137451171875, 'logits/chosen': -1.0744967460632324, 'logits/rejected': -1.0585415363311768, 'epoch': 0.9} + 90%|█████████ | 616/681 [26:11<02:49, 2.61s/it] 91%|█████████ | 617/681 [26:14<02:47, 2.61s/it] {'loss': 1.2335, 'grad_norm': 58.81951141357422, 'learning_rate': 1.378797888467345e-08, 'fcm_dpo/beta': 0.0009459134307689965, 'fcm_dpo/q_t': 0.44637531042099, 'fcm_dpo/delta': 0.07147952169179916, 'fcm_dpo/margin': 247.66403198242188, 'margin_dpo/margin_mean': 247.66403198242188, 'margin_dpo/margin_std': 561.760009765625, 'logps/chosen': -800.3141479492188, 'logps/rejected': -1049.206787109375, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'KL/chosen_KL_mean': -737.30859375, 'KL/rejected_KL_mean': -984.97265625, 'KL/mean': -861.140625, 'KL/std': 481.3660583496094, 'logits/chosen': -0.9604687690734863, 'logits/rejected': -0.9302307367324829, 'epoch': 0.91} + 91%|█████████ | 617/681 [26:14<02:47, 2.61s/it] 91%|█████████ | 618/681 [26:16<02:44, 2.61s/it] {'loss': 1.0842, 'grad_norm': 38.52542495727539, 'learning_rate': 1.3370790793601371e-08, 'fcm_dpo/beta': 0.0009419023990631104, 'fcm_dpo/q_t': 0.39000076055526733, 'fcm_dpo/delta': -0.11681665480136871, 'fcm_dpo/margin': 542.3301391601562, 'margin_dpo/margin_mean': 542.3301391601562, 'margin_dpo/margin_std': 860.1188354492188, 'logps/chosen': -844.5650024414062, 'logps/rejected': -1411.947265625, 'logps/ref_chosen': -67.10134887695312, 'logps/ref_rejected': -92.15340423583984, 'KL/chosen_KL_mean': -777.463623046875, 'KL/rejected_KL_mean': -1319.7938232421875, 'KL/mean': -1048.628662109375, 'KL/std': 667.0958251953125, 'logits/chosen': -0.9963364601135254, 'logits/rejected': -1.0328341722488403, 'epoch': 0.91} + 91%|█████████ | 618/681 [26:16<02:44, 2.61s/it] 91%|█████████ | 619/681 [26:19<02:40, 2.59s/it] {'loss': 1.1798, 'grad_norm': 50.947975158691406, 'learning_rate': 1.2959838102258535e-08, 'fcm_dpo/beta': 0.0009351515327580273, 'fcm_dpo/q_t': 0.42254310846328735, 'fcm_dpo/delta': 0.025874076411128044, 'fcm_dpo/margin': 401.0638427734375, 'margin_dpo/margin_mean': 401.0638427734375, 'margin_dpo/margin_std': 831.8917236328125, 'logps/chosen': -825.1192016601562, 'logps/rejected': -1263.39013671875, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'KL/chosen_KL_mean': -769.1409912109375, 'KL/rejected_KL_mean': -1170.204833984375, 'KL/mean': -969.6728515625, 'KL/std': 620.4271240234375, 'logits/chosen': -0.9994246959686279, 'logits/rejected': -1.0098530054092407, 'epoch': 0.91} + 91%|█████████ | 619/681 [26:19<02:40, 2.59s/it] 91%|█████████ | 620/681 [26:21<02:36, 2.57s/it] {'loss': 1.1384, 'grad_norm': 34.1131706237793, 'learning_rate': 1.2555131639630567e-08, 'fcm_dpo/beta': 0.0009386817691847682, 'fcm_dpo/q_t': 0.4203672409057617, 'fcm_dpo/delta': 0.05065443366765976, 'fcm_dpo/margin': 373.9115295410156, 'margin_dpo/margin_mean': 373.9115295410156, 'margin_dpo/margin_std': 589.354248046875, 'logps/chosen': -737.703125, 'logps/rejected': -1130.2279052734375, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'KL/chosen_KL_mean': -677.9056396484375, 'KL/rejected_KL_mean': -1051.817138671875, 'KL/mean': -864.861328125, 'KL/std': 525.7140502929688, 'logits/chosen': -1.036217451095581, 'logits/rejected': -1.0394688844680786, 'epoch': 0.91} + 91%|█████████ | 620/681 [26:21<02:36, 2.57s/it] 91%|█████████ | 621/681 [26:24<02:34, 2.57s/it] {'loss': 1.0289, 'grad_norm': 41.840362548828125, 'learning_rate': 1.2156682070109086e-08, 'fcm_dpo/beta': 0.0009327299194410443, 'fcm_dpo/q_t': 0.3806772232055664, 'fcm_dpo/delta': -0.1384207010269165, 'fcm_dpo/margin': 569.275146484375, 'margin_dpo/margin_mean': 569.275146484375, 'margin_dpo/margin_std': 718.2183227539062, 'logps/chosen': -742.04833984375, 'logps/rejected': -1345.75927734375, 'logps/ref_chosen': -53.93375778198242, 'logps/ref_rejected': -88.36951446533203, 'KL/chosen_KL_mean': -688.1146240234375, 'KL/rejected_KL_mean': -1257.3896484375, 'KL/mean': -972.752197265625, 'KL/std': 649.8193359375, 'logits/chosen': -1.082035779953003, 'logits/rejected': -1.1331275701522827, 'epoch': 0.91} + 91%|█████████ | 621/681 [26:24<02:34, 2.57s/it] 91%|█████████▏| 622/681 [26:26<02:31, 2.56s/it] {'loss': 1.1216, 'grad_norm': 29.52936553955078, 'learning_rate': 1.1764499893210878e-08, 'fcm_dpo/beta': 0.0009187752148136497, 'fcm_dpo/q_t': 0.41589581966400146, 'fcm_dpo/delta': 0.026527073234319687, 'fcm_dpo/margin': 407.2692565917969, 'margin_dpo/margin_mean': 407.26922607421875, 'margin_dpo/margin_std': 625.4295654296875, 'logps/chosen': -730.9442138671875, 'logps/rejected': -1163.4462890625, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'KL/chosen_KL_mean': -670.6583251953125, 'KL/rejected_KL_mean': -1077.927490234375, 'KL/mean': -874.29296875, 'KL/std': 506.2366943359375, 'logits/chosen': -0.9557490348815918, 'logits/rejected': -0.944530189037323, 'epoch': 0.91} + 91%|█████████▏| 622/681 [26:27<02:31, 2.56s/it] 91%|█████████▏| 623/681 [26:29<02:21, 2.44s/it] {'loss': 1.1952, 'grad_norm': 37.05131912231445, 'learning_rate': 1.1378595443300998e-08, 'fcm_dpo/beta': 0.0009395014494657516, 'fcm_dpo/q_t': 0.4357511103153229, 'fcm_dpo/delta': 0.11050058901309967, 'fcm_dpo/margin': 311.5841064453125, 'margin_dpo/margin_mean': 311.5841369628906, 'margin_dpo/margin_std': 634.1216430664062, 'logps/chosen': -787.8182983398438, 'logps/rejected': -1120.3284912109375, 'logps/ref_chosen': -64.1569595336914, 'logps/ref_rejected': -85.08304595947266, 'KL/chosen_KL_mean': -723.661376953125, 'KL/rejected_KL_mean': -1035.2454833984375, 'KL/mean': -879.453369140625, 'KL/std': 520.5437622070312, 'logits/chosen': -1.0592715740203857, 'logits/rejected': -1.0595531463623047, 'epoch': 0.91} + 91%|█████████▏| 623/681 [26:29<02:21, 2.44s/it] 92%|█████████▏| 624/681 [26:31<02:21, 2.48s/it] {'loss': 1.0496, 'grad_norm': 32.84885787963867, 'learning_rate': 1.0998978889320582e-08, 'fcm_dpo/beta': 0.0009369177860207856, 'fcm_dpo/q_t': 0.3928346335887909, 'fcm_dpo/delta': -0.062284573912620544, 'fcm_dpo/margin': 490.396240234375, 'margin_dpo/margin_mean': 490.39617919921875, 'margin_dpo/margin_std': 585.4035034179688, 'logps/chosen': -800.597412109375, 'logps/rejected': -1316.20703125, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'KL/chosen_KL_mean': -728.6787719726562, 'KL/rejected_KL_mean': -1219.074951171875, 'KL/mean': -973.8768920898438, 'KL/std': 544.4788818359375, 'logits/chosen': -1.0861725807189941, 'logits/rejected': -1.088505506515503, 'epoch': 0.92} + 92%|█████████▏| 624/681 [26:31<02:21, 2.48s/it] 92%|█████████▏| 625/681 [26:34<02:19, 2.50s/it] {'loss': 1.0202, 'grad_norm': 70.59078979492188, 'learning_rate': 1.0625660234518913e-08, 'fcm_dpo/beta': 0.0009238402126356959, 'fcm_dpo/q_t': 0.38963061571121216, 'fcm_dpo/delta': -0.08445164561271667, 'fcm_dpo/margin': 520.080322265625, 'margin_dpo/margin_mean': 520.080322265625, 'margin_dpo/margin_std': 548.3095703125, 'logps/chosen': -728.8655395507812, 'logps/rejected': -1276.6942138671875, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'KL/chosen_KL_mean': -670.5234375, 'KL/rejected_KL_mean': -1190.603759765625, 'KL/mean': -930.5636596679688, 'KL/std': 588.0369873046875, 'logits/chosen': -0.9677177667617798, 'logits/rejected': -0.9921514391899109, 'epoch': 0.92} + 92%|█████████▏| 625/681 [26:34<02:19, 2.50s/it] 92%|█████████▏| 626/681 [26:37<02:22, 2.60s/it] {'loss': 1.2146, 'grad_norm': 34.235252380371094, 'learning_rate': 1.0258649316189721e-08, 'fcm_dpo/beta': 0.0009358528186567128, 'fcm_dpo/q_t': 0.43587183952331543, 'fcm_dpo/delta': 0.12933696806430817, 'fcm_dpo/margin': 292.92529296875, 'margin_dpo/margin_mean': 292.92529296875, 'margin_dpo/margin_std': 614.7378540039062, 'logps/chosen': -911.9512939453125, 'logps/rejected': -1228.95263671875, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.188720703125, 'KL/chosen_KL_mean': -836.838623046875, 'KL/rejected_KL_mean': -1129.763916015625, 'KL/mean': -983.3013305664062, 'KL/std': 631.4542236328125, 'logits/chosen': -0.9614785313606262, 'logits/rejected': -0.9531521797180176, 'epoch': 0.92} + 92%|█████████▏| 626/681 [26:37<02:22, 2.60s/it] 92%|█████████▏| 627/681 [26:39<02:23, 2.66s/it] {'loss': 1.0311, 'grad_norm': 32.391971588134766, 'learning_rate': 9.897955805412e-09, 'fcm_dpo/beta': 0.0009262310341000557, 'fcm_dpo/q_t': 0.38598155975341797, 'fcm_dpo/delta': -0.15298572182655334, 'fcm_dpo/margin': 588.1803588867188, 'margin_dpo/margin_mean': 588.1802978515625, 'margin_dpo/margin_std': 796.7737426757812, 'logps/chosen': -614.6998291015625, 'logps/rejected': -1261.8914794921875, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'KL/chosen_KL_mean': -566.9566650390625, 'KL/rejected_KL_mean': -1155.136962890625, 'KL/mean': -861.046875, 'KL/std': 695.9779052734375, 'logits/chosen': -0.9232186079025269, 'logits/rejected': -0.9977039098739624, 'epoch': 0.92} + 92%|█████████▏| 627/681 [26:39<02:23, 2.66s/it] 92%|█████████▏| 628/681 [26:42<02:19, 2.62s/it] {'loss': 1.1065, 'grad_norm': 32.3719367980957, 'learning_rate': 9.543589206795238e-09, 'fcm_dpo/beta': 0.000911533716134727, 'fcm_dpo/q_t': 0.4087638854980469, 'fcm_dpo/delta': -0.004994707182049751, 'fcm_dpo/margin': 444.0523681640625, 'margin_dpo/margin_mean': 444.0523681640625, 'margin_dpo/margin_std': 661.4337158203125, 'logps/chosen': -804.5421752929688, 'logps/rejected': -1289.96630859375, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'KL/chosen_KL_mean': -744.3592529296875, 'KL/rejected_KL_mean': -1188.41162109375, 'KL/mean': -966.3853759765625, 'KL/std': 574.35107421875, 'logits/chosen': -1.0520081520080566, 'logits/rejected': -1.065995454788208, 'epoch': 0.92} + 92%|█████████▏| 628/681 [26:42<02:19, 2.62s/it] 92%|█████████▏| 629/681 [26:45<02:15, 2.61s/it] {'loss': 1.1059, 'grad_norm': 35.62141036987305, 'learning_rate': 9.19555885822887e-09, 'fcm_dpo/beta': 0.0009158846805803478, 'fcm_dpo/q_t': 0.4125128388404846, 'fcm_dpo/delta': 0.026277855038642883, 'fcm_dpo/margin': 409.11669921875, 'margin_dpo/margin_mean': 409.11669921875, 'margin_dpo/margin_std': 548.577880859375, 'logps/chosen': -799.6928100585938, 'logps/rejected': -1236.2496337890625, 'logps/ref_chosen': -64.21354675292969, 'logps/ref_rejected': -91.65367126464844, 'KL/chosen_KL_mean': -735.479248046875, 'KL/rejected_KL_mean': -1144.595947265625, 'KL/mean': -940.03759765625, 'KL/std': 554.8238525390625, 'logits/chosen': -1.0496397018432617, 'logits/rejected': -1.0647929906845093, 'epoch': 0.92} + 92%|█████████▏| 629/681 [26:45<02:15, 2.61s/it] 93%|█████████▎| 630/681 [26:47<02:13, 2.62s/it] {'loss': 1.2698, 'grad_norm': 56.84680938720703, 'learning_rate': 8.85387393063622e-09, 'fcm_dpo/beta': 0.0009212232544086874, 'fcm_dpo/q_t': 0.4528850317001343, 'fcm_dpo/delta': 0.045750658959150314, 'fcm_dpo/margin': 245.02310180664062, 'margin_dpo/margin_mean': 245.02308654785156, 'margin_dpo/margin_std': 707.3883056640625, 'logps/chosen': -723.5418090820312, 'logps/rejected': -992.8721923828125, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'KL/chosen_KL_mean': -664.2507934570312, 'KL/rejected_KL_mean': -909.27392578125, 'KL/mean': -786.7623291015625, 'KL/std': 577.2899169921875, 'logits/chosen': -1.043975830078125, 'logits/rejected': -1.0191277265548706, 'epoch': 0.93} + 93%|█████████▎| 630/681 [26:47<02:13, 2.62s/it] 93%|█████████▎| 631/681 [26:50<02:08, 2.58s/it] {'loss': 1.1597, 'grad_norm': 33.039405822753906, 'learning_rate': 8.518543427732949e-09, 'fcm_dpo/beta': 0.0009341588011011481, 'fcm_dpo/q_t': 0.4202990233898163, 'fcm_dpo/delta': 0.053176864981651306, 'fcm_dpo/margin': 373.1282043457031, 'margin_dpo/margin_mean': 373.1282043457031, 'margin_dpo/margin_std': 670.2313232421875, 'logps/chosen': -860.263427734375, 'logps/rejected': -1254.8896484375, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95156860351562, 'KL/chosen_KL_mean': -800.809814453125, 'KL/rejected_KL_mean': -1173.93798828125, 'KL/mean': -987.3739013671875, 'KL/std': 571.5894775390625, 'logits/chosen': -1.1113148927688599, 'logits/rejected': -1.120398759841919, 'epoch': 0.93} + 93%|█████████▎| 631/681 [26:50<02:08, 2.58s/it] 93%|█████████▎| 632/681 [26:52<02:02, 2.50s/it] {'loss': 1.1318, 'grad_norm': 43.67294692993164, 'learning_rate': 8.189576185789637e-09, 'fcm_dpo/beta': 0.0009340323740616441, 'fcm_dpo/q_t': 0.4138449430465698, 'fcm_dpo/delta': 0.023960798978805542, 'fcm_dpo/margin': 403.1651916503906, 'margin_dpo/margin_mean': 403.16522216796875, 'margin_dpo/margin_std': 643.6358642578125, 'logps/chosen': -762.039306640625, 'logps/rejected': -1190.01318359375, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'KL/chosen_KL_mean': -700.687744140625, 'KL/rejected_KL_mean': -1103.85302734375, 'KL/mean': -902.2703857421875, 'KL/std': 529.5032348632812, 'logits/chosen': -1.0655746459960938, 'logits/rejected': -1.0689226388931274, 'epoch': 0.93} + 93%|█████████▎| 632/681 [26:52<02:02, 2.50s/it] 93%|█████████▎| 633/681 [26:54<01:59, 2.48s/it] {'loss': 1.2107, 'grad_norm': 47.90824508666992, 'learning_rate': 7.866980873399015e-09, 'fcm_dpo/beta': 0.0009426448959857225, 'fcm_dpo/q_t': 0.437002032995224, 'fcm_dpo/delta': 0.02990627847611904, 'fcm_dpo/margin': 289.7676696777344, 'margin_dpo/margin_mean': 289.76763916015625, 'margin_dpo/margin_std': 609.2022094726562, 'logps/chosen': -832.0736694335938, 'logps/rejected': -1156.147216796875, 'logps/ref_chosen': -57.27816390991211, 'logps/ref_rejected': -91.58395385742188, 'KL/chosen_KL_mean': -774.7955322265625, 'KL/rejected_KL_mean': -1064.563232421875, 'KL/mean': -919.6793212890625, 'KL/std': 531.289306640625, 'logits/chosen': -1.116697072982788, 'logits/rejected': -1.1289957761764526, 'epoch': 0.93} + 93%|█████████▎| 633/681 [26:54<01:59, 2.48s/it] 93%|█████████▎| 634/681 [26:57<01:58, 2.52s/it] {'loss': 1.2439, 'grad_norm': 36.095970153808594, 'learning_rate': 7.550765991247654e-09, 'fcm_dpo/beta': 0.0009592788992449641, 'fcm_dpo/q_t': 0.44761770963668823, 'fcm_dpo/delta': 0.0725301131606102, 'fcm_dpo/margin': 245.5295867919922, 'margin_dpo/margin_mean': 245.52960205078125, 'margin_dpo/margin_std': 606.821533203125, 'logps/chosen': -931.0008544921875, 'logps/rejected': -1217.037109375, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12564849853516, 'KL/chosen_KL_mean': -864.3818969726562, 'KL/rejected_KL_mean': -1109.9114990234375, 'KL/mean': -987.146728515625, 'KL/std': 618.4544677734375, 'logits/chosen': -0.9695584774017334, 'logits/rejected': -0.9623770117759705, 'epoch': 0.93} + 93%|█████████▎| 634/681 [26:57<01:58, 2.52s/it] 93%|█████████▎| 635/681 [26:59<01:55, 2.51s/it] {'loss': 1.1569, 'grad_norm': 35.836708068847656, 'learning_rate': 7.240939871891699e-09, 'fcm_dpo/beta': 0.0009680173825472593, 'fcm_dpo/q_t': 0.42416542768478394, 'fcm_dpo/delta': 0.05820862203836441, 'fcm_dpo/margin': 355.125732421875, 'margin_dpo/margin_mean': 355.125732421875, 'margin_dpo/margin_std': 636.2976684570312, 'logps/chosen': -822.340087890625, 'logps/rejected': -1186.0107421875, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'KL/chosen_KL_mean': -748.3845825195312, 'KL/rejected_KL_mean': -1103.51025390625, 'KL/mean': -925.947509765625, 'KL/std': 645.3294067382812, 'logits/chosen': -1.0464322566986084, 'logits/rejected': -1.0263543128967285, 'epoch': 0.93} + 93%|█████████▎| 635/681 [27:00<01:55, 2.51s/it] 93%|█████████▎| 636/681 [27:02<01:56, 2.58s/it] {'loss': 1.0972, 'grad_norm': 29.87440299987793, 'learning_rate': 6.937510679537628e-09, 'fcm_dpo/beta': 0.0009746984578669071, 'fcm_dpo/q_t': 0.4083341956138611, 'fcm_dpo/delta': -0.015476349741220474, 'fcm_dpo/margin': 424.83349609375, 'margin_dpo/margin_mean': 424.83349609375, 'margin_dpo/margin_std': 628.7258911132812, 'logps/chosen': -758.3253173828125, 'logps/rejected': -1205.5087890625, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'KL/chosen_KL_mean': -698.6964111328125, 'KL/rejected_KL_mean': -1123.5299072265625, 'KL/mean': -911.1131591796875, 'KL/std': 614.4910888671875, 'logits/chosen': -1.0113909244537354, 'logits/rejected': -1.0161449909210205, 'epoch': 0.93} + 93%|█████████▎| 636/681 [27:02<01:56, 2.58s/it] 94%|█████████▎| 637/681 [27:05<01:53, 2.58s/it] {'loss': 1.0745, 'grad_norm': 30.655946731567383, 'learning_rate': 6.640486409826785e-09, 'fcm_dpo/beta': 0.0009556564618833363, 'fcm_dpo/q_t': 0.4004845917224884, 'fcm_dpo/delta': -0.050962455570697784, 'fcm_dpo/margin': 468.7959899902344, 'margin_dpo/margin_mean': 468.79595947265625, 'margin_dpo/margin_std': 649.401123046875, 'logps/chosen': -763.6051635742188, 'logps/rejected': -1281.153564453125, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'KL/chosen_KL_mean': -713.9525146484375, 'KL/rejected_KL_mean': -1182.74853515625, 'KL/mean': -948.3504638671875, 'KL/std': 626.5479736328125, 'logits/chosen': -1.1256394386291504, 'logits/rejected': -1.1713881492614746, 'epoch': 0.94} + 94%|█████████▎| 637/681 [27:05<01:53, 2.58s/it] 94%|█████████▎| 638/681 [27:08<01:54, 2.67s/it] {'loss': 1.1633, 'grad_norm': 36.20570755004883, 'learning_rate': 6.349874889624962e-09, 'fcm_dpo/beta': 0.0009507788345217705, 'fcm_dpo/q_t': 0.41397538781166077, 'fcm_dpo/delta': -0.07280878722667694, 'fcm_dpo/margin': 374.45550537109375, 'margin_dpo/margin_mean': 374.45550537109375, 'margin_dpo/margin_std': 676.257568359375, 'logps/chosen': -744.8404541015625, 'logps/rejected': -1140.4407958984375, 'logps/ref_chosen': -58.156639099121094, 'logps/ref_rejected': -79.3014907836914, 'KL/chosen_KL_mean': -686.6837768554688, 'KL/rejected_KL_mean': -1061.1392822265625, 'KL/mean': -873.9114990234375, 'KL/std': 583.2347412109375, 'logits/chosen': -0.9657926559448242, 'logits/rejected': -0.9476113319396973, 'epoch': 0.94} + 94%|█████████▎| 638/681 [27:08<01:54, 2.67s/it] 94%|█████████▍| 639/681 [27:10<01:52, 2.68s/it] {'loss': 1.3368, 'grad_norm': 108.60933685302734, 'learning_rate': 6.065683776815933e-09, 'fcm_dpo/beta': 0.0009438564302399755, 'fcm_dpo/q_t': 0.46391725540161133, 'fcm_dpo/delta': 0.0, 'fcm_dpo/margin': 169.6693115234375, 'margin_dpo/margin_mean': 169.6693115234375, 'margin_dpo/margin_std': 701.151611328125, 'logps/chosen': -1017.8701171875, 'logps/rejected': -1189.4912109375, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'KL/chosen_KL_mean': -945.5469360351562, 'KL/rejected_KL_mean': -1115.21630859375, 'KL/mean': -1030.381591796875, 'KL/std': 560.040283203125, 'logits/chosen': -0.9735069274902344, 'logits/rejected': -0.9152404069900513, 'epoch': 0.94} + 94%|█████████▍| 639/681 [27:10<01:52, 2.68s/it] 94%|█████████▍| 640/681 [27:13<01:49, 2.66s/it] {'loss': 1.0366, 'grad_norm': 46.08125305175781, 'learning_rate': 5.7879205600998296e-09, 'fcm_dpo/beta': 0.0009295439813286066, 'fcm_dpo/q_t': 0.3859960734844208, 'fcm_dpo/delta': -0.13248543441295624, 'fcm_dpo/margin': 565.5443725585938, 'margin_dpo/margin_mean': 565.5443725585938, 'margin_dpo/margin_std': 759.7664794921875, 'logps/chosen': -767.1417236328125, 'logps/rejected': -1385.15185546875, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'KL/chosen_KL_mean': -711.00732421875, 'KL/rejected_KL_mean': -1276.5517578125, 'KL/mean': -993.779541015625, 'KL/std': 650.807373046875, 'logits/chosen': -0.9270308613777161, 'logits/rejected': -0.9523489475250244, 'epoch': 0.94} + 94%|█████████▍| 640/681 [27:13<01:49, 2.66s/it] 94%|█████████▍| 641/681 [27:16<01:45, 2.63s/it] {'loss': 1.1886, 'grad_norm': 28.046123504638672, 'learning_rate': 5.516592558795746e-09, 'fcm_dpo/beta': 0.000928039662539959, 'fcm_dpo/q_t': 0.4274270534515381, 'fcm_dpo/delta': 0.0726061537861824, 'fcm_dpo/margin': 355.4219055175781, 'margin_dpo/margin_mean': 355.42193603515625, 'margin_dpo/margin_std': 733.6763916015625, 'logps/chosen': -894.083740234375, 'logps/rejected': -1271.5009765625, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'KL/chosen_KL_mean': -829.0867919921875, 'KL/rejected_KL_mean': -1184.5086669921875, 'KL/mean': -1006.7977294921875, 'KL/std': 552.3861083984375, 'logits/chosen': -1.041335940361023, 'logits/rejected': -1.0468769073486328, 'epoch': 0.94} + 94%|█████████▍| 641/681 [27:16<01:45, 2.63s/it] 94%|█████████▍| 642/681 [27:18<01:40, 2.59s/it] {'loss': 1.1535, 'grad_norm': 40.215126037597656, 'learning_rate': 5.251706922648868e-09, 'fcm_dpo/beta': 0.0009348751045763493, 'fcm_dpo/q_t': 0.4158053398132324, 'fcm_dpo/delta': -0.013475339859724045, 'fcm_dpo/margin': 441.3376770019531, 'margin_dpo/margin_mean': 441.33770751953125, 'margin_dpo/margin_std': 880.50634765625, 'logps/chosen': -860.412353515625, 'logps/rejected': -1346.3028564453125, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'KL/chosen_KL_mean': -794.72314453125, 'KL/rejected_KL_mean': -1236.060791015625, 'KL/mean': -1015.3919677734375, 'KL/std': 731.3734130859375, 'logits/chosen': -0.9885178804397583, 'logits/rejected': -1.0216963291168213, 'epoch': 0.94} + 94%|█████████▍| 642/681 [27:18<01:40, 2.59s/it] 94%|█████████▍| 643/681 [27:21<01:39, 2.61s/it] {'loss': 1.1537, 'grad_norm': 39.23821258544922, 'learning_rate': 4.993270631642038e-09, 'fcm_dpo/beta': 0.0009251800365746021, 'fcm_dpo/q_t': 0.42580801248550415, 'fcm_dpo/delta': -0.02657410502433777, 'fcm_dpo/margin': 343.71783447265625, 'margin_dpo/margin_mean': 343.71783447265625, 'margin_dpo/margin_std': 526.9955444335938, 'logps/chosen': -764.09814453125, 'logps/rejected': -1143.3343505859375, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'KL/chosen_KL_mean': -712.148193359375, 'KL/rejected_KL_mean': -1055.865966796875, 'KL/mean': -884.007080078125, 'KL/std': 530.5042724609375, 'logits/chosen': -1.1170873641967773, 'logits/rejected': -1.1149516105651855, 'epoch': 0.94} + 94%|█████████▍| 643/681 [27:21<01:39, 2.61s/it] 95%|█████████▍| 644/681 [27:23<01:36, 2.62s/it] {'loss': 1.1888, 'grad_norm': 38.85586166381836, 'learning_rate': 4.741290495811873e-09, 'fcm_dpo/beta': 0.0009356926893815398, 'fcm_dpo/q_t': 0.4279705882072449, 'fcm_dpo/delta': 0.07765576243400574, 'fcm_dpo/margin': 347.2160949707031, 'margin_dpo/margin_mean': 347.2160949707031, 'margin_dpo/margin_std': 702.9293212890625, 'logps/chosen': -747.5791015625, 'logps/rejected': -1122.914306640625, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'KL/chosen_KL_mean': -688.5614624023438, 'KL/rejected_KL_mean': -1035.777587890625, 'KL/mean': -862.1695556640625, 'KL/std': 615.8262939453125, 'logits/chosen': -0.9950805306434631, 'logits/rejected': -1.0022577047348022, 'epoch': 0.95} + 95%|█████████▍| 644/681 [27:23<01:36, 2.62s/it] 95%|█████████▍| 645/681 [27:26<01:34, 2.61s/it] {'loss': 1.3289, 'grad_norm': 89.61138916015625, 'learning_rate': 4.495773155069299e-09, 'fcm_dpo/beta': 0.0009528464288450778, 'fcm_dpo/q_t': 0.4628395438194275, 'fcm_dpo/delta': 0.06967134773731232, 'fcm_dpo/margin': 178.12442016601562, 'margin_dpo/margin_mean': 178.12442016601562, 'margin_dpo/margin_std': 673.5927734375, 'logps/chosen': -778.2612915039062, 'logps/rejected': -998.29052734375, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'KL/chosen_KL_mean': -722.38525390625, 'KL/rejected_KL_mean': -900.5097045898438, 'KL/mean': -811.447509765625, 'KL/std': 496.90313720703125, 'logits/chosen': -0.9825261831283569, 'logits/rejected': -0.968986988067627, 'epoch': 0.95} + 95%|█████████▍| 645/681 [27:26<01:34, 2.61s/it] 95%|█████████▍| 646/681 [27:28<01:29, 2.56s/it] {'loss': 1.1834, 'grad_norm': 46.54256057739258, 'learning_rate': 4.256725079024553e-09, 'fcm_dpo/beta': 0.0009646883700042963, 'fcm_dpo/q_t': 0.4325307607650757, 'fcm_dpo/delta': 0.10456812381744385, 'fcm_dpo/margin': 309.60821533203125, 'margin_dpo/margin_mean': 309.6082458496094, 'margin_dpo/margin_std': 569.5079345703125, 'logps/chosen': -750.925537109375, 'logps/rejected': -1076.7637939453125, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'KL/chosen_KL_mean': -689.6497802734375, 'KL/rejected_KL_mean': -999.2579956054688, 'KL/mean': -844.453857421875, 'KL/std': 476.8377990722656, 'logits/chosen': -1.0059431791305542, 'logits/rejected': -0.9890854954719543, 'epoch': 0.95} + 95%|█████████▍| 646/681 [27:28<01:29, 2.56s/it] 95%|█████████▌| 647/681 [27:31<01:28, 2.60s/it] {'loss': 1.1137, 'grad_norm': 35.470096588134766, 'learning_rate': 4.024152566816791e-09, 'fcm_dpo/beta': 0.0009775401558727026, 'fcm_dpo/q_t': 0.41445714235305786, 'fcm_dpo/delta': 0.030786845833063126, 'fcm_dpo/margin': 378.85992431640625, 'margin_dpo/margin_mean': 378.85992431640625, 'margin_dpo/margin_std': 526.0119018554688, 'logps/chosen': -662.9085693359375, 'logps/rejected': -1080.435546875, 'logps/ref_chosen': -54.8524169921875, 'logps/ref_rejected': -93.5194091796875, 'KL/chosen_KL_mean': -608.05615234375, 'KL/rejected_KL_mean': -986.9160766601562, 'KL/mean': -797.486083984375, 'KL/std': 535.0508422851562, 'logits/chosen': -0.937364935874939, 'logits/rejected': -0.965479850769043, 'epoch': 0.95} + 95%|█████████▌| 647/681 [27:31<01:28, 2.60s/it] 95%|█████████▌| 648/681 [27:33<01:23, 2.53s/it] {'loss': 1.0285, 'grad_norm': 27.33829116821289, 'learning_rate': 3.798061746947995e-09, 'fcm_dpo/beta': 0.000957622891291976, 'fcm_dpo/q_t': 0.3837600648403168, 'fcm_dpo/delta': -0.14847612380981445, 'fcm_dpo/margin': 564.3631591796875, 'margin_dpo/margin_mean': 564.3631591796875, 'margin_dpo/margin_std': 768.2374267578125, 'logps/chosen': -692.247314453125, 'logps/rejected': -1301.15185546875, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.7127914428711, 'KL/chosen_KL_mean': -638.0758056640625, 'KL/rejected_KL_mean': -1202.43896484375, 'KL/mean': -920.2573852539062, 'KL/std': 642.937255859375, 'logits/chosen': -1.0410782098770142, 'logits/rejected': -1.0981464385986328, 'epoch': 0.95} + 95%|█████████▌| 648/681 [27:33<01:23, 2.53s/it] 95%|█████████▌| 649/681 [27:36<01:22, 2.59s/it] {'loss': 1.2415, 'grad_norm': 29.119403839111328, 'learning_rate': 3.5784585771215235e-09, 'fcm_dpo/beta': 0.0009556890581734478, 'fcm_dpo/q_t': 0.4464304447174072, 'fcm_dpo/delta': 0.055874936282634735, 'fcm_dpo/margin': 258.4045715332031, 'margin_dpo/margin_mean': 258.4045715332031, 'margin_dpo/margin_std': 620.7310791015625, 'logps/chosen': -742.7098388671875, 'logps/rejected': -1018.7113037109375, 'logps/ref_chosen': -62.480350494384766, 'logps/ref_rejected': -80.07717895507812, 'KL/chosen_KL_mean': -680.2294921875, 'KL/rejected_KL_mean': -938.6341552734375, 'KL/mean': -809.4317626953125, 'KL/std': 502.25543212890625, 'logits/chosen': -1.0787172317504883, 'logits/rejected': -1.0679619312286377, 'epoch': 0.95} + 95%|█████████▌| 649/681 [27:36<01:22, 2.59s/it] 95%|█████████▌| 650/681 [27:39<01:21, 2.62s/it] {'loss': 1.1353, 'grad_norm': 34.31390380859375, 'learning_rate': 3.3653488440851253e-09, 'fcm_dpo/beta': 0.0009581187041476369, 'fcm_dpo/q_t': 0.4114704728126526, 'fcm_dpo/delta': -0.001857999712228775, 'fcm_dpo/margin': 419.18524169921875, 'margin_dpo/margin_mean': 419.18524169921875, 'margin_dpo/margin_std': 727.79541015625, 'logps/chosen': -791.6647338867188, 'logps/rejected': -1253.02197265625, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'KL/chosen_KL_mean': -735.5718994140625, 'KL/rejected_KL_mean': -1154.757080078125, 'KL/mean': -945.16455078125, 'KL/std': 632.589599609375, 'logits/chosen': -1.0013569593429565, 'logits/rejected': -1.0217807292938232, 'epoch': 0.95} + 95%|█████████▌| 650/681 [27:39<01:21, 2.62s/it] 96%|█████████▌| 651/681 [27:41<01:17, 2.59s/it] {'loss': 1.0005, 'grad_norm': 45.66421127319336, 'learning_rate': 3.158738163478475e-09, 'fcm_dpo/beta': 0.000941460719332099, 'fcm_dpo/q_t': 0.38108521699905396, 'fcm_dpo/delta': -0.13033278286457062, 'fcm_dpo/margin': 555.9376220703125, 'margin_dpo/margin_mean': 555.9376220703125, 'margin_dpo/margin_std': 601.0250244140625, 'logps/chosen': -526.066162109375, 'logps/rejected': -1138.5361328125, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.95791625976562, 'KL/chosen_KL_mean': -482.6407165527344, 'KL/rejected_KL_mean': -1038.578369140625, 'KL/mean': -760.6094970703125, 'KL/std': 583.9876708984375, 'logits/chosen': -1.0007972717285156, 'logits/rejected': -1.059419870376587, 'epoch': 0.96} + 96%|█████████▌| 651/681 [27:41<01:17, 2.59s/it] 96%|█████████▌| 652/681 [27:44<01:15, 2.59s/it] {'loss': 1.1271, 'grad_norm': 32.21805953979492, 'learning_rate': 2.9586319796851555e-09, 'fcm_dpo/beta': 0.0009400760754942894, 'fcm_dpo/q_t': 0.4142574071884155, 'fcm_dpo/delta': 0.015617836266756058, 'fcm_dpo/margin': 409.4013671875, 'margin_dpo/margin_mean': 409.4013671875, 'margin_dpo/margin_std': 668.522216796875, 'logps/chosen': -695.4808959960938, 'logps/rejected': -1154.0732421875, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'KL/chosen_KL_mean': -632.904052734375, 'KL/rejected_KL_mean': -1042.305419921875, 'KL/mean': -837.604736328125, 'KL/std': 583.210693359375, 'logits/chosen': -1.0335817337036133, 'logits/rejected': -1.0595180988311768, 'epoch': 0.96} + 96%|█████████▌| 652/681 [27:44<01:15, 2.59s/it] 96%|█████████▌| 653/681 [27:46<01:11, 2.57s/it] {'loss': 1.1353, 'grad_norm': 32.95127487182617, 'learning_rate': 2.7650355656892166e-09, 'fcm_dpo/beta': 0.0009441639995202422, 'fcm_dpo/q_t': 0.41772544384002686, 'fcm_dpo/delta': 0.03045791946351528, 'fcm_dpo/margin': 392.53460693359375, 'margin_dpo/margin_mean': 392.53460693359375, 'margin_dpo/margin_std': 648.3425903320312, 'logps/chosen': -817.6948852539062, 'logps/rejected': -1252.3660888671875, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'KL/chosen_KL_mean': -756.5819091796875, 'KL/rejected_KL_mean': -1149.116455078125, 'KL/mean': -952.8492431640625, 'KL/std': 617.5569458007812, 'logits/chosen': -1.0887930393218994, 'logits/rejected': -1.1141128540039062, 'epoch': 0.96} + 96%|█████████▌| 653/681 [27:47<01:11, 2.57s/it] 96%|█████████▌| 654/681 [27:49<01:09, 2.57s/it] {'loss': 1.1455, 'grad_norm': 34.27675247192383, 'learning_rate': 2.577954022936174e-09, 'fcm_dpo/beta': 0.0009525552159175277, 'fcm_dpo/q_t': 0.42324844002723694, 'fcm_dpo/delta': 0.06012295186519623, 'fcm_dpo/margin': 358.99334716796875, 'margin_dpo/margin_mean': 358.9933776855469, 'margin_dpo/margin_std': 584.0943603515625, 'logps/chosen': -744.39208984375, 'logps/rejected': -1140.43115234375, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'KL/chosen_KL_mean': -682.6639404296875, 'KL/rejected_KL_mean': -1041.6572265625, 'KL/mean': -862.1605834960938, 'KL/std': 506.5113220214844, 'logits/chosen': -1.0422253608703613, 'logits/rejected': -1.0543601512908936, 'epoch': 0.96} + 96%|█████████▌| 654/681 [27:49<01:09, 2.57s/it] 96%|█████████▌| 655/681 [27:52<01:06, 2.54s/it] {'loss': 1.136, 'grad_norm': 34.595184326171875, 'learning_rate': 2.397392281198729e-09, 'fcm_dpo/beta': 0.0009614527225494385, 'fcm_dpo/q_t': 0.41819822788238525, 'fcm_dpo/delta': 0.03576880693435669, 'fcm_dpo/margin': 380.1811828613281, 'margin_dpo/margin_mean': 380.1811828613281, 'margin_dpo/margin_std': 621.2908935546875, 'logps/chosen': -697.57080078125, 'logps/rejected': -1126.467041015625, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'KL/chosen_KL_mean': -647.9940185546875, 'KL/rejected_KL_mean': -1028.1751708984375, 'KL/mean': -838.0845947265625, 'KL/std': 522.7816162109375, 'logits/chosen': -1.0417159795761108, 'logits/rejected': -1.0822257995605469, 'epoch': 0.96} + 96%|█████████▌| 655/681 [27:52<01:06, 2.54s/it] 96%|█████████▋| 656/681 [27:54<01:05, 2.60s/it] {'loss': 0.9582, 'grad_norm': 73.65252685546875, 'learning_rate': 2.223355098446622e-09, 'fcm_dpo/beta': 0.0009351709159091115, 'fcm_dpo/q_t': 0.36535900831222534, 'fcm_dpo/delta': -0.2227155566215515, 'fcm_dpo/margin': 651.910400390625, 'margin_dpo/margin_mean': 651.910400390625, 'margin_dpo/margin_std': 694.8057250976562, 'logps/chosen': -744.788818359375, 'logps/rejected': -1457.824462890625, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'KL/chosen_KL_mean': -692.2393798828125, 'KL/rejected_KL_mean': -1344.149658203125, 'KL/mean': -1018.1945190429688, 'KL/std': 693.30029296875, 'logits/chosen': -0.9182928800582886, 'logits/rejected': -0.9882034063339233, 'epoch': 0.96} + 96%|█████████▋| 656/681 [27:54<01:05, 2.60s/it] 96%|█████████▋| 657/681 [27:57<00:59, 2.49s/it] {'loss': 1.0508, 'grad_norm': 42.068206787109375, 'learning_rate': 2.055847060721566e-09, 'fcm_dpo/beta': 0.0009086633799597621, 'fcm_dpo/q_t': 0.39257729053497314, 'fcm_dpo/delta': -0.08051308244466782, 'fcm_dpo/margin': 524.4208374023438, 'margin_dpo/margin_mean': 524.4208374023438, 'margin_dpo/margin_std': 682.353271484375, 'logps/chosen': -702.7789306640625, 'logps/rejected': -1278.4140625, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'KL/chosen_KL_mean': -656.0784301757812, 'KL/rejected_KL_mean': -1180.499267578125, 'KL/mean': -918.288818359375, 'KL/std': 663.51416015625, 'logits/chosen': -1.087989330291748, 'logits/rejected': -1.1325247287750244, 'epoch': 0.96} + 96%|█████████▋| 657/681 [27:57<00:59, 2.49s/it] 97%|█████████▋| 658/681 [27:59<00:56, 2.45s/it] {'loss': 1.1177, 'grad_norm': 31.92685890197754, 'learning_rate': 1.8948725820160662e-09, 'fcm_dpo/beta': 0.000907151261344552, 'fcm_dpo/q_t': 0.4157608151435852, 'fcm_dpo/delta': 0.037033095955848694, 'fcm_dpo/margin': 401.3349609375, 'margin_dpo/margin_mean': 401.3349609375, 'margin_dpo/margin_std': 558.94873046875, 'logps/chosen': -743.6102905273438, 'logps/rejected': -1179.926513671875, 'logps/ref_chosen': -60.95820999145508, 'logps/ref_rejected': -95.93949127197266, 'KL/chosen_KL_mean': -682.652099609375, 'KL/rejected_KL_mean': -1083.987060546875, 'KL/mean': -883.319580078125, 'KL/std': 494.90802001953125, 'logits/chosen': -1.0313150882720947, 'logits/rejected': -1.0605497360229492, 'epoch': 0.97} + 97%|█████████▋| 658/681 [27:59<00:56, 2.45s/it] 97%|█████████▋| 659/681 [28:02<00:55, 2.54s/it] {'loss': 1.114, 'grad_norm': 32.02241516113281, 'learning_rate': 1.7404359041573723e-09, 'fcm_dpo/beta': 0.0009139457251876593, 'fcm_dpo/q_t': 0.41561028361320496, 'fcm_dpo/delta': 0.030485082417726517, 'fcm_dpo/margin': 405.39166259765625, 'margin_dpo/margin_mean': 405.39166259765625, 'margin_dpo/margin_std': 572.6546020507812, 'logps/chosen': -698.8282470703125, 'logps/rejected': -1114.947998046875, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'KL/chosen_KL_mean': -622.085205078125, 'KL/rejected_KL_mean': -1027.47705078125, 'KL/mean': -824.7811279296875, 'KL/std': 517.73681640625, 'logits/chosen': -0.9981797933578491, 'logits/rejected': -0.9754196405410767, 'epoch': 0.97} + 97%|█████████▋| 659/681 [28:02<00:55, 2.54s/it] 97%|█████████▋| 660/681 [28:04<00:52, 2.51s/it] {'loss': 1.0543, 'grad_norm': 37.91978454589844, 'learning_rate': 1.592541096695571e-09, 'fcm_dpo/beta': 0.0009104161872528493, 'fcm_dpo/q_t': 0.3939950466156006, 'fcm_dpo/delta': -0.0718650072813034, 'fcm_dpo/margin': 514.663330078125, 'margin_dpo/margin_mean': 514.663330078125, 'margin_dpo/margin_std': 662.3004150390625, 'logps/chosen': -740.54541015625, 'logps/rejected': -1272.120849609375, 'logps/ref_chosen': -59.04788589477539, 'logps/ref_rejected': -75.96005249023438, 'KL/chosen_KL_mean': -681.49755859375, 'KL/rejected_KL_mean': -1196.1607666015625, 'KL/mean': -938.8291625976562, 'KL/std': 619.5654907226562, 'logits/chosen': -1.0615503787994385, 'logits/rejected': -1.0776853561401367, 'epoch': 0.97} + 97%|█████████▋| 660/681 [28:04<00:52, 2.51s/it] 97%|█████████▋| 661/681 [28:06<00:48, 2.44s/it] {'loss': 1.0881, 'grad_norm': 50.24213409423828, 'learning_rate': 1.4511920567963908e-09, 'fcm_dpo/beta': 0.0009062248282134533, 'fcm_dpo/q_t': 0.40717989206314087, 'fcm_dpo/delta': -0.019264454022049904, 'fcm_dpo/margin': 461.5398254394531, 'margin_dpo/margin_mean': 461.53985595703125, 'margin_dpo/margin_std': 661.6412963867188, 'logps/chosen': -648.9098510742188, 'logps/rejected': -1145.7813720703125, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'KL/chosen_KL_mean': -598.23583984375, 'KL/rejected_KL_mean': -1059.775634765625, 'KL/mean': -829.0057983398438, 'KL/std': 670.0771484375, 'logits/chosen': -1.0732464790344238, 'logits/rejected': -1.0874643325805664, 'epoch': 0.97} + 97%|█████████▋| 661/681 [28:06<00:48, 2.44s/it] 97%|█████████▋| 662/681 [28:09<00:49, 2.58s/it] {'loss': 1.1688, 'grad_norm': 28.40976905822754, 'learning_rate': 1.3163925091384532e-09, 'fcm_dpo/beta': 0.0009117955341935158, 'fcm_dpo/q_t': 0.4251037836074829, 'fcm_dpo/delta': 0.06958886981010437, 'fcm_dpo/margin': 364.87664794921875, 'margin_dpo/margin_mean': 364.87664794921875, 'margin_dpo/margin_std': 676.1181030273438, 'logps/chosen': -758.1707763671875, 'logps/rejected': -1142.84228515625, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'KL/chosen_KL_mean': -688.9097900390625, 'KL/rejected_KL_mean': -1053.786376953125, 'KL/mean': -871.3480224609375, 'KL/std': 555.1720581054688, 'logits/chosen': -0.9545935392379761, 'logits/rejected': -0.9512023329734802, 'epoch': 0.97} + 97%|█████████▋| 662/681 [28:09<00:49, 2.58s/it] 97%|█████████▋| 663/681 [28:12<00:47, 2.66s/it] {'loss': 1.1201, 'grad_norm': 29.266870498657227, 'learning_rate': 1.1881460058152382e-09, 'fcm_dpo/beta': 0.0009125665528699756, 'fcm_dpo/q_t': 0.4114909768104553, 'fcm_dpo/delta': -0.006048870272934437, 'fcm_dpo/margin': 444.63604736328125, 'margin_dpo/margin_mean': 444.63604736328125, 'margin_dpo/margin_std': 745.1632690429688, 'logps/chosen': -725.249755859375, 'logps/rejected': -1218.9322509765625, 'logps/ref_chosen': -64.87890625, 'logps/ref_rejected': -113.92536926269531, 'KL/chosen_KL_mean': -660.3707885742188, 'KL/rejected_KL_mean': -1105.0068359375, 'KL/mean': -882.6888427734375, 'KL/std': 639.6807250976562, 'logits/chosen': -1.043156385421753, 'logits/rejected': -1.0693552494049072, 'epoch': 0.97} + 97%|█████████▋| 663/681 [28:12<00:47, 2.66s/it] 98%|█████████▊| 664/681 [28:15<00:45, 2.65s/it] {'loss': 1.063, 'grad_norm': 29.01834487915039, 'learning_rate': 1.066455926241383e-09, 'fcm_dpo/beta': 0.0009023561142385006, 'fcm_dpo/q_t': 0.3978724479675293, 'fcm_dpo/delta': -0.05629858374595642, 'fcm_dpo/margin': 502.282470703125, 'margin_dpo/margin_mean': 502.2824401855469, 'margin_dpo/margin_std': 654.3357543945312, 'logps/chosen': -732.568359375, 'logps/rejected': -1279.484130859375, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'KL/chosen_KL_mean': -671.679931640625, 'KL/rejected_KL_mean': -1173.96240234375, 'KL/mean': -922.8211669921875, 'KL/std': 610.1594848632812, 'logits/chosen': -1.0154392719268799, 'logits/rejected': -1.0499646663665771, 'epoch': 0.98} + 98%|█████████▊| 664/681 [28:15<00:45, 2.65s/it] 98%|█████████▊| 665/681 [28:17<00:40, 2.56s/it] {'loss': 1.0939, 'grad_norm': 45.89773941040039, 'learning_rate': 9.513254770636137e-10, 'fcm_dpo/beta': 0.0009059334406629205, 'fcm_dpo/q_t': 0.4117254316806793, 'fcm_dpo/delta': 0.01930341310799122, 'fcm_dpo/margin': 421.0374755859375, 'margin_dpo/margin_mean': 421.0375061035156, 'margin_dpo/margin_std': 522.3038940429688, 'logps/chosen': -668.214111328125, 'logps/rejected': -1113.4962158203125, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.80882263183594, 'KL/chosen_KL_mean': -607.6499633789062, 'KL/rejected_KL_mean': -1028.6873779296875, 'KL/mean': -818.168701171875, 'KL/std': 506.8304138183594, 'logits/chosen': -1.088966965675354, 'logits/rejected': -1.1092216968536377, 'epoch': 0.98} + 98%|█████████▊| 665/681 [28:17<00:40, 2.56s/it] 98%|█████████▊| 666/681 [28:20<00:39, 2.61s/it] {'loss': 1.1133, 'grad_norm': 33.37041091918945, 'learning_rate': 8.427576920763956e-10, 'fcm_dpo/beta': 0.0009081506868824363, 'fcm_dpo/q_t': 0.4136677384376526, 'fcm_dpo/delta': 0.020935581997036934, 'fcm_dpo/margin': 418.2576904296875, 'margin_dpo/margin_mean': 418.2577209472656, 'margin_dpo/margin_std': 598.1719970703125, 'logps/chosen': -721.43701171875, 'logps/rejected': -1171.1663818359375, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.8916244506836, 'KL/chosen_KL_mean': -657.01708984375, 'KL/rejected_KL_mean': -1075.274658203125, 'KL/mean': -866.1459350585938, 'KL/std': 536.670166015625, 'logits/chosen': -0.9409841299057007, 'logits/rejected': -0.9495470523834229, 'epoch': 0.98} + 98%|█████████▊| 666/681 [28:20<00:39, 2.61s/it] 98%|█████████▊| 667/681 [28:22<00:36, 2.62s/it] {'loss': 1.0648, 'grad_norm': 38.522857666015625, 'learning_rate': 7.407554321417764e-10, 'fcm_dpo/beta': 0.0009012054651975632, 'fcm_dpo/q_t': 0.3979080319404602, 'fcm_dpo/delta': -0.052186060696840286, 'fcm_dpo/margin': 498.8653259277344, 'margin_dpo/margin_mean': 498.86529541015625, 'margin_dpo/margin_std': 650.4932861328125, 'logps/chosen': -824.3060302734375, 'logps/rejected': -1341.7298583984375, 'logps/ref_chosen': -69.27702331542969, 'logps/ref_rejected': -87.83549499511719, 'KL/chosen_KL_mean': -755.029052734375, 'KL/rejected_KL_mean': -1253.8944091796875, 'KL/mean': -1004.461669921875, 'KL/std': 589.2806396484375, 'logits/chosen': -0.9830505847930908, 'logits/rejected': -0.9843896627426147, 'epoch': 0.98} + 98%|█████████▊| 667/681 [28:22<00:36, 2.62s/it] 98%|█████████▊| 668/681 [28:25<00:34, 2.64s/it] {'loss': 1.2016, 'grad_norm': 44.24299621582031, 'learning_rate': 6.453213851142225e-10, 'fcm_dpo/beta': 0.0009167675743810833, 'fcm_dpo/q_t': 0.43127357959747314, 'fcm_dpo/delta': 0.08393767476081848, 'fcm_dpo/margin': 346.7828674316406, 'margin_dpo/margin_mean': 346.7828674316406, 'margin_dpo/margin_std': 747.9251098632812, 'logps/chosen': -857.318115234375, 'logps/rejected': -1235.236083984375, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905944824219, 'KL/chosen_KL_mean': -784.714111328125, 'KL/rejected_KL_mean': -1131.4970703125, 'KL/mean': -958.1055908203125, 'KL/std': 627.3168334960938, 'logits/chosen': -1.0314850807189941, 'logits/rejected': -1.0380046367645264, 'epoch': 0.98} + 98%|█████████▊| 668/681 [28:25<00:34, 2.64s/it] 98%|█████████▊| 669/681 [28:28<00:31, 2.65s/it] {'loss': 1.0536, 'grad_norm': 29.531373977661133, 'learning_rate': 5.564580657695939e-10, 'fcm_dpo/beta': 0.0009072460234165192, 'fcm_dpo/q_t': 0.3927006125450134, 'fcm_dpo/delta': -0.07513141632080078, 'fcm_dpo/margin': 519.8635864257812, 'margin_dpo/margin_mean': 519.8635864257812, 'margin_dpo/margin_std': 669.6885375976562, 'logps/chosen': -647.6539306640625, 'logps/rejected': -1199.325439453125, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'KL/chosen_KL_mean': -601.5374755859375, 'KL/rejected_KL_mean': -1121.401123046875, 'KL/mean': -861.4692993164062, 'KL/std': 594.085693359375, 'logits/chosen': -1.0162256956100464, 'logits/rejected': -1.0264288187026978, 'epoch': 0.98} + 98%|█████████▊| 669/681 [28:28<00:31, 2.65s/it] 98%|█████████▊| 670/681 [28:30<00:29, 2.64s/it] {'loss': 1.0656, 'grad_norm': 30.942873001098633, 'learning_rate': 4.741678157389739e-10, 'fcm_dpo/beta': 0.0008986732573248446, 'fcm_dpo/q_t': 0.396476686000824, 'fcm_dpo/delta': -0.057006560266017914, 'fcm_dpo/margin': 505.550537109375, 'margin_dpo/margin_mean': 505.550537109375, 'margin_dpo/margin_std': 658.2862548828125, 'logps/chosen': -632.271728515625, 'logps/rejected': -1172.4169921875, 'logps/ref_chosen': -62.34575271606445, 'logps/ref_rejected': -96.9405517578125, 'KL/chosen_KL_mean': -569.926025390625, 'KL/rejected_KL_mean': -1075.4765625, 'KL/mean': -822.7012939453125, 'KL/std': 546.4443359375, 'logits/chosen': -0.9540762901306152, 'logits/rejected': -0.972830593585968, 'epoch': 0.98} + 98%|█████████▊| 670/681 [28:30<00:29, 2.64s/it] 99%|█████████▊| 671/681 [28:33<00:25, 2.55s/it] {'loss': 1.1447, 'grad_norm': 34.9372673034668, 'learning_rate': 3.9845280344705245e-10, 'fcm_dpo/beta': 0.0009013921953737736, 'fcm_dpo/q_t': 0.4173119068145752, 'fcm_dpo/delta': 0.0433029942214489, 'fcm_dpo/margin': 397.0133361816406, 'margin_dpo/margin_mean': 397.0133361816406, 'margin_dpo/margin_std': 663.8474731445312, 'logps/chosen': -779.120849609375, 'logps/rejected': -1211.953369140625, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'KL/chosen_KL_mean': -731.1207275390625, 'KL/rejected_KL_mean': -1128.134033203125, 'KL/mean': -929.6273803710938, 'KL/std': 551.165283203125, 'logits/chosen': -1.0201672315597534, 'logits/rejected': -1.0463124513626099, 'epoch': 0.99} + 99%|█████████▊| 671/681 [28:33<00:25, 2.55s/it] 99%|█████████▊| 672/681 [28:35<00:22, 2.54s/it] {'loss': 1.157, 'grad_norm': 49.699440002441406, 'learning_rate': 3.293150240547549e-10, 'fcm_dpo/beta': 0.0009026298066601157, 'fcm_dpo/q_t': 0.418613076210022, 'fcm_dpo/delta': 0.0317508839070797, 'fcm_dpo/margin': 409.248291015625, 'margin_dpo/margin_mean': 409.248291015625, 'margin_dpo/margin_std': 746.950927734375, 'logps/chosen': -867.9027099609375, 'logps/rejected': -1311.7078857421875, 'logps/ref_chosen': -58.58328628540039, 'logps/ref_rejected': -93.14015197753906, 'KL/chosen_KL_mean': -809.3194580078125, 'KL/rejected_KL_mean': -1218.567626953125, 'KL/mean': -1013.943603515625, 'KL/std': 674.4744873046875, 'logits/chosen': -1.111328363418579, 'logits/rejected': -1.1195930242538452, 'epoch': 0.99} + 99%|█████████▊| 672/681 [28:35<00:22, 2.54s/it] 99%|█████████▉| 673/681 [28:38<00:19, 2.47s/it] {'loss': 1.1329, 'grad_norm': 29.702667236328125, 'learning_rate': 2.6675629940689504e-10, 'fcm_dpo/beta': 0.0009116331348195672, 'fcm_dpo/q_t': 0.4198199510574341, 'fcm_dpo/delta': 0.04632698372006416, 'fcm_dpo/margin': 389.7848205566406, 'margin_dpo/margin_mean': 389.7848205566406, 'margin_dpo/margin_std': 619.7523803710938, 'logps/chosen': -769.0118408203125, 'logps/rejected': -1197.36962890625, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'KL/chosen_KL_mean': -722.28857421875, 'KL/rejected_KL_mean': -1112.073486328125, 'KL/mean': -917.1810302734375, 'KL/std': 562.255615234375, 'logits/chosen': -1.029843807220459, 'logits/rejected': -1.0315158367156982, 'epoch': 0.99} + 99%|█████████▉| 673/681 [28:38<00:19, 2.47s/it] 99%|█████████▉| 674/681 [28:40<00:17, 2.56s/it] {'loss': 1.0664, 'grad_norm': 38.08716583251953, 'learning_rate': 2.1077827798404725e-10, 'fcm_dpo/beta': 0.0009095786954276264, 'fcm_dpo/q_t': 0.4003087282180786, 'fcm_dpo/delta': -0.05102291703224182, 'fcm_dpo/margin': 493.3524475097656, 'margin_dpo/margin_mean': 493.3524169921875, 'margin_dpo/margin_std': 656.5906982421875, 'logps/chosen': -625.580322265625, 'logps/rejected': -1143.533203125, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'KL/chosen_KL_mean': -580.1348266601562, 'KL/rejected_KL_mean': -1073.4873046875, 'KL/mean': -826.81103515625, 'KL/std': 549.4868774414062, 'logits/chosen': -0.9516767263412476, 'logits/rejected': -0.9709774255752563, 'epoch': 0.99} + 99%|█████████▉| 674/681 [28:40<00:17, 2.56s/it] 99%|█████████▉| 675/681 [28:43<00:15, 2.55s/it] {'loss': 1.0605, 'grad_norm': 24.625337600708008, 'learning_rate': 1.6138243485910863e-10, 'fcm_dpo/beta': 0.000889546936377883, 'fcm_dpo/q_t': 0.3973570168018341, 'fcm_dpo/delta': -0.06706520915031433, 'fcm_dpo/margin': 519.83984375, 'margin_dpo/margin_mean': 519.83984375, 'margin_dpo/margin_std': 662.8849487304688, 'logps/chosen': -723.0770263671875, 'logps/rejected': -1272.83251953125, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'KL/chosen_KL_mean': -678.9006958007812, 'KL/rejected_KL_mean': -1198.7406005859375, 'KL/mean': -938.8206787109375, 'KL/std': 619.1822509765625, 'logits/chosen': -1.0201187133789062, 'logits/rejected': -1.0345721244812012, 'epoch': 0.99} + 99%|█████████▉| 675/681 [28:43<00:15, 2.55s/it] 99%|█████████▉| 676/681 [28:46<00:12, 2.58s/it] {'loss': 1.0732, 'grad_norm': 24.022327423095703, 'learning_rate': 1.1857007165852472e-10, 'fcm_dpo/beta': 0.000891472096554935, 'fcm_dpo/q_t': 0.4033673405647278, 'fcm_dpo/delta': -0.024206459522247314, 'fcm_dpo/margin': 474.67572021484375, 'margin_dpo/margin_mean': 474.6757507324219, 'margin_dpo/margin_std': 593.3427124023438, 'logps/chosen': -799.2227783203125, 'logps/rejected': -1290.858642578125, 'logps/ref_chosen': -71.39852905273438, 'logps/ref_rejected': -88.3587646484375, 'KL/chosen_KL_mean': -727.82421875, 'KL/rejected_KL_mean': -1202.5, 'KL/mean': -965.162109375, 'KL/std': 583.515869140625, 'logits/chosen': -0.9483212232589722, 'logits/rejected': -0.9590877294540405, 'epoch': 0.99} + 99%|█████████▉| 676/681 [28:46<00:12, 2.58s/it] 99%|█████████▉| 677/681 [28:48<00:10, 2.51s/it] {'loss': 1.0982, 'grad_norm': 27.512174606323242, 'learning_rate': 8.23423165278725e-11, 'fcm_dpo/beta': 0.0008846810087561607, 'fcm_dpo/q_t': 0.40949833393096924, 'fcm_dpo/delta': -0.021789535880088806, 'fcm_dpo/margin': 475.6611022949219, 'margin_dpo/margin_mean': 475.66107177734375, 'margin_dpo/margin_std': 737.0035400390625, 'logps/chosen': -782.5301513671875, 'logps/rejected': -1279.890380859375, 'logps/ref_chosen': -56.527435302734375, 'logps/ref_rejected': -78.22654724121094, 'KL/chosen_KL_mean': -726.002685546875, 'KL/rejected_KL_mean': -1201.663818359375, 'KL/mean': -963.833251953125, 'KL/std': 609.81103515625, 'logits/chosen': -1.0342793464660645, 'logits/rejected': -1.0246810913085938, 'epoch': 0.99} + 99%|█████████▉| 677/681 [28:48<00:10, 2.51s/it] 100%|█████████▉| 678/681 [28:50<00:07, 2.48s/it] {'loss': 1.0583, 'grad_norm': 32.770172119140625, 'learning_rate': 5.270012410216185e-11, 'fcm_dpo/beta': 0.0008793273009359837, 'fcm_dpo/q_t': 0.3942733407020569, 'fcm_dpo/delta': -0.07475695013999939, 'fcm_dpo/margin': 535.78466796875, 'margin_dpo/margin_mean': 535.78466796875, 'margin_dpo/margin_std': 712.2026977539062, 'logps/chosen': -649.9947509765625, 'logps/rejected': -1220.24951171875, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'KL/chosen_KL_mean': -603.8602294921875, 'KL/rejected_KL_mean': -1139.6448974609375, 'KL/mean': -871.7525634765625, 'KL/std': 642.23291015625, 'logits/chosen': -0.9874995946884155, 'logits/rejected': -1.0228235721588135, 'epoch': 1.0} + 100%|█████████▉| 678/681 [28:50<00:07, 2.48s/it] 100%|█████████▉| 679/681 [28:53<00:05, 2.56s/it] {'loss': 1.1443, 'grad_norm': 30.4984130859375, 'learning_rate': 2.9644275480772416e-11, 'fcm_dpo/beta': 0.0008801834774203598, 'fcm_dpo/q_t': 0.42413240671157837, 'fcm_dpo/delta': 0.06918685883283615, 'fcm_dpo/margin': 378.50738525390625, 'margin_dpo/margin_mean': 378.50738525390625, 'margin_dpo/margin_std': 593.6575927734375, 'logps/chosen': -747.0115966796875, 'logps/rejected': -1151.8221435546875, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'KL/chosen_KL_mean': -696.7166748046875, 'KL/rejected_KL_mean': -1075.22412109375, 'KL/mean': -885.9703369140625, 'KL/std': 518.1109619140625, 'logits/chosen': -1.0104858875274658, 'logits/rejected': -1.000281810760498, 'epoch': 1.0} + 100%|█████████▉| 679/681 [28:53<00:05, 2.56s/it] 100%|█████████▉| 680/681 [28:56<00:02, 2.58s/it] {'loss': 1.0878, 'grad_norm': 45.33549118041992, 'learning_rate': 1.31753782067201e-11, 'fcm_dpo/beta': 0.0008748341351747513, 'fcm_dpo/q_t': 0.3994414210319519, 'fcm_dpo/delta': -0.04234904423356056, 'fcm_dpo/margin': 502.96063232421875, 'margin_dpo/margin_mean': 502.96063232421875, 'margin_dpo/margin_std': 730.917724609375, 'logps/chosen': -762.55126953125, 'logps/rejected': -1300.98095703125, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'KL/chosen_KL_mean': -685.6356201171875, 'KL/rejected_KL_mean': -1188.59619140625, 'KL/mean': -937.115966796875, 'KL/std': 657.9539184570312, 'logits/chosen': -0.9940932989120483, 'logits/rejected': -1.0226861238479614, 'epoch': 1.0} + 100%|█████████▉| 680/681 [28:56<00:02, 2.58s/it] 100%|██████████| 681/681 [28:58<00:00, 2.56s/it] {'loss': 1.1472, 'grad_norm': 27.740650177001953, 'learning_rate': 3.2938662507808745e-12, 'fcm_dpo/beta': 0.0008728657849133015, 'fcm_dpo/q_t': 0.4215954542160034, 'fcm_dpo/delta': -0.041127026081085205, 'fcm_dpo/margin': 393.31298828125, 'margin_dpo/margin_mean': 393.31298828125, 'margin_dpo/margin_std': 621.3822631835938, 'logps/chosen': -756.53271484375, 'logps/rejected': -1177.446533203125, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.55797576904297, 'KL/chosen_KL_mean': -695.575439453125, 'KL/rejected_KL_mean': -1088.888427734375, 'KL/mean': -892.23193359375, 'KL/std': 566.686767578125, 'logits/chosen': -1.0516822338104248, 'logits/rejected': -1.0712807178497314, 'epoch': 1.0} + 100%|██████████| 681/681 [28:58<00:00, 2.56s/it][INFO|trainer.py:2681] 2026-04-29 16:09:35,351 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 1738.7131, 'train_samples_per_second': 25.075, 'train_steps_per_second': 0.392, 'train_loss': 1.1094634984558374, 'epoch': 1.0} + 100%|██████████| 681/681 [28:58<00:00, 2.56s/it] 100%|██████████| 681/681 [28:58<00:00, 2.55s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 1.1095 + train_runtime = 0:28:58.71 + train_samples = 43598 + train_samples_per_second = 25.075 + train_steps_per_second = 0.392 +2026-04-29 16:09:35 - INFO - __main__ - *** Training complete *** +2026-04-29 16:09:35 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-29 16:10:08,681 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/config.json +[INFO|configuration_utils.py:911] 2026-04-29 16:10:08,683 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-29 16:11:22,622 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-29 16:11:22,628 >> tokenizer config file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-29 16:11:22,630 >> Special tokens file saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/special_tokens_map.json +2026-04-29 16:11:22 - INFO - __main__ - Saved HF-compatible model artifacts to /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449 +[INFO|modelcard.py:450] 2026-04-29 16:11:24,250 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-29 16:11:24,257 >> Configuration saved in /workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449/config.json +2026-04-29 16:11:24 - INFO - __main__ - Skipping margin dataset upload because push_margin_dataset is false. +2026-04-29 16:11:24 - INFO - __main__ - *** Training complete! *** +wandb: - 0.011 MB of 0.011 MB uploaded wandb: \ 0.011 MB of 0.011 MB uploaded wandb: | 0.011 MB of 0.011 MB uploaded wandb: / 0.011 MB of 0.619 MB uploaded wandb: - 0.011 MB of 0.635 MB uploaded wandb: \ 0.635 MB of 0.635 MB uploaded wandb: | 0.635 MB of 0.635 MB uploaded wandb: +wandb: Run history: +wandb: train/KL/chosen_KL_mean ████████▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▅▅▅▄▂▃▃▃▃▃▁▁▂▁▁▁▁ +wandb: train/KL/mean ████████▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▅▄▂▃▃▃▃▂▂▂▂▁▁▁▂ +wandb: train/KL/rejected_KL_mean ████████▇▇▇▇▇▆▆▆▆▅▆▅▅▅▅▅▅▅▄▂▃▃▃▃▂▃▂▃▂▁▁▂ +wandb: train/KL/std ▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▃▄▄▄▄▄▅▇▆▆▅▇▇▆▆▆▆███ +wandb: train/epoch ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/fcm_dpo/beta ▇▇▇█▆▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +wandb: train/fcm_dpo/delta ▆▆▆▇▁▅▄▆▆▄▄▆▅█▅▆▅▆▆▅█▄▇▃▄▅▅▅▆▅▆▃▂█▇▆▅▄▂▅ +wandb: train/fcm_dpo/margin ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▆ +wandb: train/fcm_dpo/q_t ██▇▄▁▄▃▄▄▃▃▄▃▅▃▃▃▄▄▃▅▂▄▂▂▃▃▃▄▃▄▂▂▅▄▄▃▂▂▃ +wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/grad_norm ▄▅▃▃▃▂▁▂▁▂▁▁▂▂▂▂▂▂▂▃▃▄▂▁▃▄▂▃▃▂▃▂▅▃▄▃▃▄█▄ +wandb: train/learning_rate ▂▃▅▇██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁ +wandb: train/logits/chosen ▆▆▆▅▅▅▄▆▇▇▇█▇▇▇▇█▇█▇▇▇▆▇▆▆▅▂▃▃▃▃▂▂▂▂▁▂▂▂ +wandb: train/logits/rejected ▆▆▆▅▅▅▅▆▇▇▇▇▇▇▇▇█▇█▇▇▇▆▆▆▆▄▃▃▃▃▂▂▁▂▂▁▂▂▂ +wandb: train/logps/chosen █████▇█▇▇▇▇▇▆▆▆▆▆▅▅▅▅▆▄▅▅▅▄▁▃▃▂▃▃▂▁▂▁▁▂▁ +wandb: train/logps/ref_chosen ▅█▆▆▄▃▆▆▅▄▇▃▄▃█▅▅▄▇▅▃█▇▅▅▃▆▁▇▅▃▇▅▆▇▆▃▆▆▂ +wandb: train/logps/ref_rejected ▇▄█▅▅▃▆█▇▄▅▃▅▆▅▅▄▅█▅█▄▆▃▅▄▃▁▆▃▄▆▃▄█▅▄▂▂▂ +wandb: train/logps/rejected ████████▇▇▇▇▇▆▆▆▆▅▆▅▆▅▅▅▅▅▄▂▃▃▃▃▂▃▂▃▂▁▁▂ +wandb: train/loss ██▇▃▁▄▃▄▃▃▂▄▃▄▂▃▂▅▃▃▅▁▄▁▂▂▂▃▃▃▄▂▁▅▄▃▂▂▁▃ +wandb: train/margin_dpo/margin_mean ▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃▅▄▄▅▅▅▅▄▆▇▄▅▅▆▇█▆ +wandb: train/margin_dpo/margin_std ▁▁▁▁▁▁▁▂▂▂▂▂▃▃▂▃▃▄▃▄▄▃▃▄▄▄▄▇▅▆▅▇▇▆█▆▆█▇█ +wandb: +wandb: Run summary: +wandb: total_flos 0.0 +wandb: train/KL/chosen_KL_mean -695.57544 +wandb: train/KL/mean -892.23193 +wandb: train/KL/rejected_KL_mean -1088.88843 +wandb: train/KL/std 566.68677 +wandb: train/epoch 1.0 +wandb: train/fcm_dpo/beta 0.00087 +wandb: train/fcm_dpo/delta -0.04113 +wandb: train/fcm_dpo/margin 393.31299 +wandb: train/fcm_dpo/q_t 0.4216 +wandb: train/global_step 681 +wandb: train/grad_norm 27.74065 +wandb: train/learning_rate 0.0 +wandb: train/logits/chosen -1.05168 +wandb: train/logits/rejected -1.07128 +wandb: train/logps/chosen -756.53271 +wandb: train/logps/ref_chosen -60.95728 +wandb: train/logps/ref_rejected -88.55798 +wandb: train/logps/rejected -1177.44653 +wandb: train/loss 1.1472 +wandb: train/margin_dpo/margin_mean 393.31299 +wandb: train/margin_dpo/margin_std 621.38226 +wandb: train_loss 1.10946 +wandb: train_runtime 1738.7131 +wandb: train_samples_per_second 25.075 +wandb: train_steps_per_second 0.392 +wandb: +wandb: 🚀 View run llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p05-20260429-085449 at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep/runs/4022pu2h +wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/llama3-hh-new-dpo-multi-beta-sweep +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/wandb/run-20260429_153950-4022pu2h/logs +wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information. diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..112710c --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.1094634984558374, + "train_runtime": 1738.7131, + "train_samples": 43598, + "train_samples_per_second": 25.075, + "train_steps_per_second": 0.392 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..ced10e5 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,15706 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "KL/chosen_KL_mean": 0.00527191162109375, + "KL/mean": 0.016706019639968872, + "KL/rejected_KL_mean": 0.028141021728515625, + "KL/std": 0.272699236869812, + "epoch": 0.0014684287812041115, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02287006378173828, + "fcm_dpo/q_t": 0.5002857446670532, + "grad_norm": 41.73493576049805, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.3875, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "KL/chosen_KL_mean": -0.03498649597167969, + "KL/mean": -0.00212840735912323, + "KL/rejected_KL_mean": 0.030735015869140625, + "KL/std": 0.24797174334526062, + "epoch": 0.002936857562408223, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.06572261452674866, + "fcm_dpo/q_t": 0.5008214712142944, + "grad_norm": 36.00978469848633, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.49536412954330444, + "logits/rejected": -0.4594460427761078, + "logps/chosen": -52.65568923950195, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.3897, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "KL/chosen_KL_mean": -0.0045108795166015625, + "KL/mean": 0.003316923975944519, + "KL/rejected_KL_mean": 0.01114654541015625, + "KL/std": 0.2563997805118561, + "epoch": 0.004405286343612335, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.015658468008041382, + "fcm_dpo/q_t": 0.5001957416534424, + "grad_norm": 35.48371505737305, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.48171138763427734, + "logits/rejected": -0.4422028362751007, + "logps/chosen": -60.986106872558594, + "logps/ref_chosen": -60.981597900390625, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.66145324707031, + "loss": 1.3872, + "margin_dpo/margin_mean": -0.015658140182495117, + "margin_dpo/margin_std": 0.39206600189208984, + "step": 3 + }, + { + "KL/chosen_KL_mean": -0.0025787353515625, + "KL/mean": 0.015432953834533691, + "KL/rejected_KL_mean": 0.03343963623046875, + "KL/std": 0.23463661968708038, + "epoch": 0.005873715124816446, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.03601771593093872, + "fcm_dpo/q_t": 0.5004501342773438, + "grad_norm": 35.9489860534668, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.4681958258152008, + "logits/rejected": -0.44056397676467896, + "logps/chosen": -56.77029037475586, + "logps/ref_chosen": -56.7677116394043, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.6136703491211, + "loss": 1.3882, + "margin_dpo/margin_mean": -0.036018311977386475, + "margin_dpo/margin_std": 0.3561931252479553, + "step": 4 + }, + { + "KL/chosen_KL_mean": 0.04430961608886719, + "KL/mean": 0.030420929193496704, + "KL/rejected_KL_mean": 0.01653289794921875, + "KL/std": 0.26933568716049194, + "epoch": 0.007342143906020558, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.027776658535003662, + "fcm_dpo/q_t": 0.49965283274650574, + "grad_norm": 44.74127960205078, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.5143798589706421, + "logits/rejected": -0.47071516513824463, + "logps/chosen": -53.8150634765625, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.13265228271484, + "loss": 1.385, + "margin_dpo/margin_mean": 0.027777403593063354, + "margin_dpo/margin_std": 0.3397705554962158, + "step": 5 + }, + { + "KL/chosen_KL_mean": -0.016576766967773438, + "KL/mean": -0.036144837737083435, + "KL/rejected_KL_mean": -0.0557098388671875, + "KL/std": 0.2481634020805359, + "epoch": 0.00881057268722467, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.03913220763206482, + "fcm_dpo/q_t": 0.49951091408729553, + "grad_norm": 45.87062072753906, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.5242589712142944, + "logits/rejected": -0.4836902618408203, + "logps/chosen": -63.02406311035156, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.70105743408203, + "loss": 1.3844, + "margin_dpo/margin_mean": 0.03913196921348572, + "margin_dpo/margin_std": 0.38666093349456787, + "step": 6 + }, + { + "KL/chosen_KL_mean": 0.025547027587890625, + "KL/mean": 0.029840022325515747, + "KL/rejected_KL_mean": 0.0341339111328125, + "KL/std": 0.2671242356300354, + "epoch": 0.010279001468428781, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.008586883544921875, + "fcm_dpo/q_t": 0.5001072883605957, + "grad_norm": 41.22108840942383, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5003604292869568, + "logits/rejected": -0.4664100110530853, + "logps/chosen": -57.74927520751953, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.88645935058594, + "loss": 1.3868, + "margin_dpo/margin_mean": -0.0085868239402771, + "margin_dpo/margin_std": 0.36728373169898987, + "step": 7 + }, + { + "KL/chosen_KL_mean": -0.002834320068359375, + "KL/mean": 0.04533374309539795, + "KL/rejected_KL_mean": 0.093505859375, + "KL/std": 0.28405576944351196, + "epoch": 0.011747430249632892, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.09634339809417725, + "fcm_dpo/q_t": 0.5012041926383972, + "grad_norm": 39.53245544433594, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5020660161972046, + "logits/rejected": -0.4754522442817688, + "logps/chosen": -58.7188720703125, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.2179183959961, + "loss": 1.3912, + "margin_dpo/margin_mean": -0.09634318947792053, + "margin_dpo/margin_std": 0.40796253085136414, + "step": 8 + }, + { + "KL/chosen_KL_mean": 0.0199737548828125, + "KL/mean": 0.02644728124141693, + "KL/rejected_KL_mean": 0.03292083740234375, + "KL/std": 0.3076080083847046, + "epoch": 0.013215859030837005, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.012944132089614868, + "fcm_dpo/q_t": 0.5001616477966309, + "grad_norm": 42.5697021484375, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.49002158641815186, + "logits/rejected": -0.4431573152542114, + "logps/chosen": -69.84687042236328, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.56974792480469, + "loss": 1.3871, + "margin_dpo/margin_mean": -0.012945234775543213, + "margin_dpo/margin_std": 0.432614266872406, + "step": 9 + }, + { + "KL/chosen_KL_mean": 0.018072128295898438, + "KL/mean": 0.0005231276154518127, + "KL/rejected_KL_mean": -0.01702117919921875, + "KL/std": 0.22773060202598572, + "epoch": 0.014684287812041116, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.03509734570980072, + "fcm_dpo/q_t": 0.4995613098144531, + "grad_norm": 35.374786376953125, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4773544371128082, + "logits/rejected": -0.43332165479660034, + "logps/chosen": -48.33961486816406, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.38908386230469, + "loss": 1.3846, + "margin_dpo/margin_mean": 0.035097718238830566, + "margin_dpo/margin_std": 0.32590410113334656, + "step": 10 + }, + { + "KL/chosen_KL_mean": 0.055454254150390625, + "KL/mean": -0.006332814693450928, + "KL/rejected_KL_mean": -0.06811904907226562, + "KL/std": 0.2968614101409912, + "epoch": 0.016152716593245228, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.1235695481300354, + "fcm_dpo/q_t": 0.4984557032585144, + "grad_norm": 34.34830093383789, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.4542468190193176, + "logits/rejected": -0.42898106575012207, + "logps/chosen": -52.961402893066406, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.84849548339844, + "loss": 1.3802, + "margin_dpo/margin_mean": 0.12356960773468018, + "margin_dpo/margin_std": 0.4112103283405304, + "step": 11 + }, + { + "KL/chosen_KL_mean": -0.037494659423828125, + "KL/mean": -0.04345113784074783, + "KL/rejected_KL_mean": -0.049404144287109375, + "KL/std": 0.3219500184059143, + "epoch": 0.01762114537444934, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.011913254857063293, + "fcm_dpo/q_t": 0.4998508393764496, + "grad_norm": 45.08433532714844, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.5271201133728027, + "logits/rejected": -0.4905800521373749, + "logps/chosen": -61.842933654785156, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.8582763671875, + "logps/rejected": -104.90767669677734, + "loss": 1.3858, + "margin_dpo/margin_mean": 0.011912867426872253, + "margin_dpo/margin_std": 0.4435839056968689, + "step": 12 + }, + { + "KL/chosen_KL_mean": -0.0016231536865234375, + "KL/mean": 0.008255481719970703, + "KL/rejected_KL_mean": 0.018131256103515625, + "KL/std": 0.2522842288017273, + "epoch": 0.01908957415565345, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.019751250743865967, + "fcm_dpo/q_t": 0.5002469420433044, + "grad_norm": 39.63324737548828, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.44067633152008057, + "logits/rejected": -0.409400999546051, + "logps/chosen": -64.26197814941406, + "logps/ref_chosen": -64.2603530883789, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.18495178222656, + "loss": 1.3874, + "margin_dpo/margin_mean": -0.01975110173225403, + "margin_dpo/margin_std": 0.3618961572647095, + "step": 13 + }, + { + "KL/chosen_KL_mean": 0.0144195556640625, + "KL/mean": -0.015029460191726685, + "KL/rejected_KL_mean": -0.0444793701171875, + "KL/std": 0.254509299993515, + "epoch": 0.020558002936857563, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.0589028000831604, + "fcm_dpo/q_t": 0.49926379323005676, + "grad_norm": 42.595924377441406, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.4786085784435272, + "logits/rejected": -0.43931445479393005, + "logps/chosen": -58.09579086303711, + "logps/ref_chosen": -58.11021041870117, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.09156036376953, + "loss": 1.3834, + "margin_dpo/margin_mean": 0.05890271067619324, + "margin_dpo/margin_std": 0.36086153984069824, + "step": 14 + }, + { + "KL/chosen_KL_mean": -0.0267486572265625, + "KL/mean": -0.03432337939739227, + "KL/rejected_KL_mean": -0.041904449462890625, + "KL/std": 0.24552780389785767, + "epoch": 0.022026431718061675, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.01515999436378479, + "fcm_dpo/q_t": 0.49981045722961426, + "grad_norm": 32.06040573120117, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.501712441444397, + "logits/rejected": -0.4834703207015991, + "logps/chosen": -56.99365997314453, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.85054779052734, + "loss": 1.3856, + "margin_dpo/margin_mean": 0.015159964561462402, + "margin_dpo/margin_std": 0.37245649099349976, + "step": 15 + }, + { + "KL/chosen_KL_mean": -0.00298309326171875, + "KL/mean": -0.015021562576293945, + "KL/rejected_KL_mean": -0.0270538330078125, + "KL/std": 0.269855260848999, + "epoch": 0.023494860499265784, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.02406895160675049, + "fcm_dpo/q_t": 0.49969929456710815, + "grad_norm": 42.0484619140625, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.4899655878543854, + "logits/rejected": -0.4453532099723816, + "logps/chosen": -61.74287414550781, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.39653015136719, + "loss": 1.3852, + "margin_dpo/margin_mean": 0.024068236351013184, + "margin_dpo/margin_std": 0.3988404870033264, + "step": 16 + }, + { + "KL/chosen_KL_mean": 0.05109405517578125, + "KL/mean": -0.004536911845207214, + "KL/rejected_KL_mean": -0.06017303466796875, + "KL/std": 0.2485760748386383, + "epoch": 0.024963289280469897, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.11126986145973206, + "fcm_dpo/q_t": 0.4986092150211334, + "grad_norm": 39.231082916259766, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.5107743740081787, + "logits/rejected": -0.47472792863845825, + "logps/chosen": -67.65924072265625, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.43882751464844, + "loss": 1.3808, + "margin_dpo/margin_mean": 0.1112699806690216, + "margin_dpo/margin_std": 0.33091142773628235, + "step": 17 + }, + { + "KL/chosen_KL_mean": -0.00733184814453125, + "KL/mean": -0.011897072196006775, + "KL/rejected_KL_mean": -0.016460418701171875, + "KL/std": 0.2411435842514038, + "epoch": 0.02643171806167401, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.009129971265792847, + "fcm_dpo/q_t": 0.4998858571052551, + "grad_norm": 41.25909423828125, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.4928959012031555, + "logits/rejected": -0.43723440170288086, + "logps/chosen": -47.746822357177734, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.4887466430664, + "loss": 1.3859, + "margin_dpo/margin_mean": 0.009129911661148071, + "margin_dpo/margin_std": 0.3549841642379761, + "step": 18 + }, + { + "KL/chosen_KL_mean": 0.03185272216796875, + "KL/mean": -0.017944127321243286, + "KL/rejected_KL_mean": -0.06773757934570312, + "KL/std": 0.23240481317043304, + "epoch": 0.027900146842878122, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09958422183990479, + "fcm_dpo/q_t": 0.4987553358078003, + "grad_norm": 36.71526336669922, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.47301602363586426, + "logits/rejected": -0.42177867889404297, + "logps/chosen": -70.17350769042969, + "logps/ref_chosen": -70.20536041259766, + "logps/ref_rejected": -89.7575912475586, + "logps/rejected": -89.82533264160156, + "loss": 1.3814, + "margin_dpo/margin_mean": 0.09958454966545105, + "margin_dpo/margin_std": 0.33764326572418213, + "step": 19 + }, + { + "KL/chosen_KL_mean": 0.016534805297851562, + "KL/mean": -0.01947064697742462, + "KL/rejected_KL_mean": -0.05547332763671875, + "KL/std": 0.24900861084461212, + "epoch": 0.02936857562408223, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.07201334834098816, + "fcm_dpo/q_t": 0.49909985065460205, + "grad_norm": 37.05985641479492, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5658631324768066, + "logits/rejected": -0.510959267616272, + "logps/chosen": -50.786705017089844, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.82334899902344, + "logps/rejected": -78.87882232666016, + "loss": 1.3828, + "margin_dpo/margin_mean": 0.07201322913169861, + "margin_dpo/margin_std": 0.3497501015663147, + "step": 20 + }, + { + "KL/chosen_KL_mean": 0.010187149047851562, + "KL/mean": -0.04661019146442413, + "KL/rejected_KL_mean": -0.1034088134765625, + "KL/std": 0.2971247434616089, + "epoch": 0.030837004405286344, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.11359718441963196, + "fcm_dpo/q_t": 0.49858027696609497, + "grad_norm": 38.864524841308594, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.4945378303527832, + "logits/rejected": -0.47060784697532654, + "logps/chosen": -50.05282974243164, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -77.97219848632812, + "loss": 1.3807, + "margin_dpo/margin_mean": 0.1135970950126648, + "margin_dpo/margin_std": 0.3924105763435364, + "step": 21 + }, + { + "KL/chosen_KL_mean": 0.025745391845703125, + "KL/mean": -0.054353222250938416, + "KL/rejected_KL_mean": -0.13445281982421875, + "KL/std": 0.2926764190196991, + "epoch": 0.032305433186490456, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.16020318865776062, + "fcm_dpo/q_t": 0.4979976713657379, + "grad_norm": 42.72419357299805, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.4607342481613159, + "logits/rejected": -0.4157930612564087, + "logps/chosen": -59.031890869140625, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.63912963867188, + "loss": 1.3784, + "margin_dpo/margin_mean": 0.1602029800415039, + "margin_dpo/margin_std": 0.4115052819252014, + "step": 22 + }, + { + "KL/chosen_KL_mean": 0.0500335693359375, + "KL/mean": -0.061298683285713196, + "KL/rejected_KL_mean": -0.17262649536132812, + "KL/std": 0.3359306752681732, + "epoch": 0.033773861967694566, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.22266075015068054, + "fcm_dpo/q_t": 0.49721741676330566, + "grad_norm": 40.804290771484375, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.49854540824890137, + "logits/rejected": -0.47655850648880005, + "logps/chosen": -60.02766418457031, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.13955688476562, + "logps/rejected": -81.31217956542969, + "loss": 1.3753, + "margin_dpo/margin_mean": 0.2226608693599701, + "margin_dpo/margin_std": 0.4632441997528076, + "step": 23 + }, + { + "KL/chosen_KL_mean": 0.0107421875, + "KL/mean": -0.08500338345766068, + "KL/rejected_KL_mean": -0.18075180053710938, + "KL/std": 0.29481637477874756, + "epoch": 0.03524229074889868, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.19149301946163177, + "fcm_dpo/q_t": 0.4976065456867218, + "grad_norm": 46.37499237060547, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.5174161195755005, + "logits/rejected": -0.5007544159889221, + "logps/chosen": -44.28029251098633, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.30596923828125, + "loss": 1.3768, + "margin_dpo/margin_mean": 0.19149288535118103, + "margin_dpo/margin_std": 0.36473649740219116, + "step": 24 + }, + { + "KL/chosen_KL_mean": -0.03951263427734375, + "KL/mean": -0.12438388168811798, + "KL/rejected_KL_mean": -0.20925140380859375, + "KL/std": 0.3535291850566864, + "epoch": 0.03671071953010279, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.16974028944969177, + "fcm_dpo/q_t": 0.4978786110877991, + "grad_norm": 37.40980529785156, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.45742011070251465, + "logits/rejected": -0.42607590556144714, + "logps/chosen": -52.57656478881836, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.55143737792969, + "loss": 1.378, + "margin_dpo/margin_mean": 0.16974005103111267, + "margin_dpo/margin_std": 0.45375657081604004, + "step": 25 + }, + { + "KL/chosen_KL_mean": 0.05691337585449219, + "KL/mean": -0.13059790432453156, + "KL/rejected_KL_mean": -0.31810760498046875, + "KL/std": 0.4097515344619751, + "epoch": 0.0381791483113069, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.375026136636734, + "fcm_dpo/q_t": 0.4953131675720215, + "grad_norm": 44.840396881103516, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5315337181091309, + "logits/rejected": -0.5001455545425415, + "logps/chosen": -53.86589431762695, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.67782592773438, + "loss": 1.3678, + "margin_dpo/margin_mean": 0.3750268816947937, + "margin_dpo/margin_std": 0.46361756324768066, + "step": 26 + }, + { + "KL/chosen_KL_mean": 0.11811256408691406, + "KL/mean": -0.12570391595363617, + "KL/rejected_KL_mean": -0.3695220947265625, + "KL/std": 0.4389367699623108, + "epoch": 0.039647577092511016, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.4876362979412079, + "fcm_dpo/q_t": 0.4939061999320984, + "grad_norm": 47.54275131225586, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.531154990196228, + "logits/rejected": -0.4949020743370056, + "logps/chosen": -42.780418395996094, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72419738769531, + "logps/rejected": -99.09371948242188, + "loss": 1.3622, + "margin_dpo/margin_mean": 0.4876362681388855, + "margin_dpo/margin_std": 0.5126945972442627, + "step": 27 + }, + { + "KL/chosen_KL_mean": 0.028047561645507812, + "KL/mean": -0.13079789280891418, + "KL/rejected_KL_mean": -0.2896461486816406, + "KL/std": 0.37808555364608765, + "epoch": 0.041116005873715125, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.3176928162574768, + "fcm_dpo/q_t": 0.49602949619293213, + "grad_norm": 38.3453254699707, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.516849160194397, + "logits/rejected": -0.46265852451324463, + "logps/chosen": -60.5284538269043, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.69076538085938, + "loss": 1.3706, + "margin_dpo/margin_mean": 0.31769293546676636, + "margin_dpo/margin_std": 0.5036317110061646, + "step": 28 + }, + { + "KL/chosen_KL_mean": 0.06805419921875, + "KL/mean": -0.18801212310791016, + "KL/rejected_KL_mean": -0.4440765380859375, + "KL/std": 0.4913862943649292, + "epoch": 0.042584434654919234, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.512132465839386, + "fcm_dpo/q_t": 0.4936005473136902, + "grad_norm": 46.29709243774414, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5535327196121216, + "logits/rejected": -0.5072311758995056, + "logps/chosen": -57.73973083496094, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.83842468261719, + "loss": 1.3611, + "margin_dpo/margin_mean": 0.5121327638626099, + "margin_dpo/margin_std": 0.6046355366706848, + "step": 29 + }, + { + "KL/chosen_KL_mean": 0.16227149963378906, + "KL/mean": -0.18443751335144043, + "KL/rejected_KL_mean": -0.5311508178710938, + "KL/std": 0.5754395723342896, + "epoch": 0.04405286343612335, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.6934210658073425, + "fcm_dpo/q_t": 0.4913354218006134, + "grad_norm": 45.00627899169922, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.5093830823898315, + "logits/rejected": -0.47936421632766724, + "logps/chosen": -52.41510009765625, + "logps/ref_chosen": -52.577369689941406, + "logps/ref_rejected": -98.48920440673828, + "logps/rejected": -99.02035522460938, + "loss": 1.3522, + "margin_dpo/margin_mean": 0.6934208869934082, + "margin_dpo/margin_std": 0.5992348194122314, + "step": 30 + }, + { + "KL/chosen_KL_mean": 0.11116981506347656, + "KL/mean": -0.14780662953853607, + "KL/rejected_KL_mean": -0.4067840576171875, + "KL/std": 0.56305992603302, + "epoch": 0.04552129221732746, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5179520845413208, + "fcm_dpo/q_t": 0.4935287833213806, + "grad_norm": 34.56964874267578, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.5045328140258789, + "logits/rejected": -0.4596520662307739, + "logps/chosen": -63.69575119018555, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.30078887939453, + "loss": 1.3609, + "margin_dpo/margin_mean": 0.5179519653320312, + "margin_dpo/margin_std": 0.6779955625534058, + "step": 31 + }, + { + "KL/chosen_KL_mean": 0.2145843505859375, + "KL/mean": -0.16551779210567474, + "KL/rejected_KL_mean": -0.545623779296875, + "KL/std": 0.735187828540802, + "epoch": 0.04698972099853157, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.760206937789917, + "fcm_dpo/q_t": 0.4905046820640564, + "grad_norm": 43.04856872558594, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5065813064575195, + "logits/rejected": -0.46486425399780273, + "logps/chosen": -62.524940490722656, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.86312866210938, + "loss": 1.3491, + "margin_dpo/margin_mean": 0.760206937789917, + "margin_dpo/margin_std": 0.8800061941146851, + "step": 32 + }, + { + "KL/chosen_KL_mean": 0.09627151489257812, + "KL/mean": -0.18434438109397888, + "KL/rejected_KL_mean": -0.46495819091796875, + "KL/std": 0.5440672636032104, + "epoch": 0.048458149779735685, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5612262487411499, + "fcm_dpo/q_t": 0.492986798286438, + "grad_norm": 37.19485092163086, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.4748949110507965, + "logits/rejected": -0.4482702612876892, + "logps/chosen": -53.164703369140625, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.35009765625, + "loss": 1.3587, + "margin_dpo/margin_mean": 0.561225950717926, + "margin_dpo/margin_std": 0.6160410642623901, + "step": 33 + }, + { + "KL/chosen_KL_mean": 0.09169197082519531, + "KL/mean": -0.32464924454689026, + "KL/rejected_KL_mean": -0.7409934997558594, + "KL/std": 0.7199804782867432, + "epoch": 0.049926578560939794, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.8326817750930786, + "fcm_dpo/q_t": 0.48959898948669434, + "grad_norm": 40.41777801513672, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.48803627490997314, + "logits/rejected": -0.47066670656204224, + "logps/chosen": -50.72563552856445, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.662841796875, + "loss": 1.3456, + "margin_dpo/margin_mean": 0.8326810002326965, + "margin_dpo/margin_std": 0.8663803339004517, + "step": 34 + }, + { + "KL/chosen_KL_mean": 0.12615013122558594, + "KL/mean": -0.4437118172645569, + "KL/rejected_KL_mean": -1.0135726928710938, + "KL/std": 0.9808096885681152, + "epoch": 0.0513950073421439, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1397216320037842, + "fcm_dpo/q_t": 0.4857790470123291, + "grad_norm": 43.86104965209961, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.538067638874054, + "logits/rejected": -0.5010450482368469, + "logps/chosen": -50.898338317871094, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.83799743652344, + "loss": 1.3309, + "margin_dpo/margin_mean": 1.1397206783294678, + "margin_dpo/margin_std": 1.1088385581970215, + "step": 35 + }, + { + "KL/chosen_KL_mean": 0.036411285400390625, + "KL/mean": -0.529353678226471, + "KL/rejected_KL_mean": -1.0951156616210938, + "KL/std": 1.0766912698745728, + "epoch": 0.05286343612334802, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1315288543701172, + "fcm_dpo/q_t": 0.48587337136268616, + "grad_norm": 38.84652328491211, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.570672869682312, + "logits/rejected": -0.5350126028060913, + "logps/chosen": -51.955078125, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.0406265258789, + "logps/rejected": -87.1357421875, + "loss": 1.3314, + "margin_dpo/margin_mean": 1.1315281391143799, + "margin_dpo/margin_std": 1.1600990295410156, + "step": 36 + }, + { + "KL/chosen_KL_mean": 0.039447784423828125, + "KL/mean": -0.4909515976905823, + "KL/rejected_KL_mean": -1.0213546752929688, + "KL/std": 1.065048336982727, + "epoch": 0.05433186490455213, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.0608052015304565, + "fcm_dpo/q_t": 0.4867693781852722, + "grad_norm": 32.67951202392578, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.4992326498031616, + "logits/rejected": -0.45456790924072266, + "logps/chosen": -62.767662048339844, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.91642761230469, + "loss": 1.3351, + "margin_dpo/margin_mean": 1.0608049631118774, + "margin_dpo/margin_std": 1.3338418006896973, + "step": 37 + }, + { + "KL/chosen_KL_mean": 0.1778545379638672, + "KL/mean": -0.534498929977417, + "KL/rejected_KL_mean": -1.2468528747558594, + "KL/std": 1.3990492820739746, + "epoch": 0.055800293685756244, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.4247064590454102, + "fcm_dpo/q_t": 0.4822547733783722, + "grad_norm": 37.384422302246094, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5284711122512817, + "logits/rejected": -0.4957655370235443, + "logps/chosen": -48.21266555786133, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.15929412841797, + "loss": 1.3182, + "margin_dpo/margin_mean": 1.4247064590454102, + "margin_dpo/margin_std": 1.7057501077651978, + "step": 38 + }, + { + "KL/chosen_KL_mean": 0.08975982666015625, + "KL/mean": -0.7341597080230713, + "KL/rejected_KL_mean": -1.5580787658691406, + "KL/std": 1.322374939918518, + "epoch": 0.05726872246696035, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.6478345394134521, + "fcm_dpo/q_t": 0.47944512963294983, + "grad_norm": 39.39072799682617, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5558722019195557, + "logits/rejected": -0.5158903002738953, + "logps/chosen": -50.66071319580078, + "logps/ref_chosen": -50.75047302246094, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.12759399414062, + "loss": 1.307, + "margin_dpo/margin_mean": 1.6478347778320312, + "margin_dpo/margin_std": 1.4638022184371948, + "step": 39 + }, + { + "KL/chosen_KL_mean": 0.1870288848876953, + "KL/mean": -0.5974045395851135, + "KL/rejected_KL_mean": -1.3818397521972656, + "KL/std": 1.3903248310089111, + "epoch": 0.05873715124816446, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.568869948387146, + "fcm_dpo/q_t": 0.48045170307159424, + "grad_norm": 32.67512512207031, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.531123161315918, + "logits/rejected": -0.5019083619117737, + "logps/chosen": -57.798038482666016, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.3000717163086, + "logps/rejected": -75.68191528320312, + "loss": 1.3112, + "margin_dpo/margin_mean": 1.5688700675964355, + "margin_dpo/margin_std": 1.6431810855865479, + "step": 40 + }, + { + "KL/chosen_KL_mean": 0.026700973510742188, + "KL/mean": -0.9341164827346802, + "KL/rejected_KL_mean": -1.894927978515625, + "KL/std": 1.8969902992248535, + "epoch": 0.06020558002936858, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.921630620956421, + "fcm_dpo/q_t": 0.47608882188796997, + "grad_norm": 37.314823150634766, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.549726128578186, + "logits/rejected": -0.5137777328491211, + "logps/chosen": -62.66911315917969, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.91845703125, + "loss": 1.2953, + "margin_dpo/margin_mean": 1.9216312170028687, + "margin_dpo/margin_std": 2.0707690715789795, + "step": 41 + }, + { + "KL/chosen_KL_mean": 0.20686912536621094, + "KL/mean": -1.109514832496643, + "KL/rejected_KL_mean": -2.4258995056152344, + "KL/std": 2.2611937522888184, + "epoch": 0.06167400881057269, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.6327667236328125, + "fcm_dpo/q_t": 0.46733659505844116, + "grad_norm": 44.921146392822266, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.5433107614517212, + "logits/rejected": -0.49691134691238403, + "logps/chosen": -58.759559631347656, + "logps/ref_chosen": -58.966426849365234, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.33427429199219, + "loss": 1.2632, + "margin_dpo/margin_mean": 2.6327667236328125, + "margin_dpo/margin_std": 2.5917067527770996, + "step": 42 + }, + { + "KL/chosen_KL_mean": 0.5680294036865234, + "KL/mean": -0.7123653888702393, + "KL/rejected_KL_mean": -1.9927635192871094, + "KL/std": 1.8695602416992188, + "epoch": 0.0631424375917768, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.5607895851135254, + "fcm_dpo/q_t": 0.46813303232192993, + "grad_norm": 39.96173858642578, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.529514729976654, + "logits/rejected": -0.50406813621521, + "logps/chosen": -53.58796691894531, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.47295379638672, + "loss": 1.2648, + "margin_dpo/margin_mean": 2.5607893466949463, + "margin_dpo/margin_std": 1.9669482707977295, + "step": 43 + }, + { + "KL/chosen_KL_mean": 0.22922897338867188, + "KL/mean": -1.1871364116668701, + "KL/rejected_KL_mean": -2.603504180908203, + "KL/std": 2.2904388904571533, + "epoch": 0.06461086637298091, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.832730293273926, + "fcm_dpo/q_t": 0.46478694677352905, + "grad_norm": 44.83015823364258, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.4886033236980438, + "logits/rejected": -0.4682733416557312, + "logps/chosen": -49.84926986694336, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.38726806640625, + "loss": 1.2528, + "margin_dpo/margin_mean": 2.832730293273926, + "margin_dpo/margin_std": 2.242119789123535, + "step": 44 + }, + { + "KL/chosen_KL_mean": 0.15188217163085938, + "KL/mean": -1.0016334056854248, + "KL/rejected_KL_mean": -2.1551513671875, + "KL/std": 2.107585906982422, + "epoch": 0.06607929515418502, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.307036876678467, + "fcm_dpo/q_t": 0.4713747501373291, + "grad_norm": 35.4964485168457, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.49817246198654175, + "logits/rejected": -0.48587897419929504, + "logps/chosen": -48.26304626464844, + "logps/ref_chosen": -48.4149284362793, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -80.09158325195312, + "loss": 1.2787, + "margin_dpo/margin_mean": 2.307036876678467, + "margin_dpo/margin_std": 2.619992733001709, + "step": 45 + }, + { + "KL/chosen_KL_mean": 0.21907806396484375, + "KL/mean": -1.2944903373718262, + "KL/rejected_KL_mean": -2.808063507080078, + "KL/std": 2.69203782081604, + "epoch": 0.06754772393538913, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.0271382331848145, + "fcm_dpo/q_t": 0.4626089632511139, + "grad_norm": 40.982444763183594, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.5164551138877869, + "logits/rejected": -0.464849591255188, + "logps/chosen": -55.78034973144531, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.46064758300781, + "loss": 1.2474, + "margin_dpo/margin_mean": 3.0271389484405518, + "margin_dpo/margin_std": 3.295480966567993, + "step": 46 + }, + { + "KL/chosen_KL_mean": 0.3923931121826172, + "KL/mean": -1.1159597635269165, + "KL/rejected_KL_mean": -2.6243133544921875, + "KL/std": 2.6395797729492188, + "epoch": 0.06901615271659324, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.0167040824890137, + "fcm_dpo/q_t": 0.46258771419525146, + "grad_norm": 37.45136642456055, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.5594693422317505, + "logits/rejected": -0.5059822797775269, + "logps/chosen": -57.53368377685547, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -97.30352020263672, + "loss": 1.246, + "margin_dpo/margin_mean": 3.016704559326172, + "margin_dpo/margin_std": 2.752382516860962, + "step": 47 + }, + { + "KL/chosen_KL_mean": 0.07604217529296875, + "KL/mean": -1.5602600574493408, + "KL/rejected_KL_mean": -3.1965599060058594, + "KL/std": 2.765866756439209, + "epoch": 0.07048458149779736, + "fcm_dpo/beta": 0.05000000074505806, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.2726051807403564, + "fcm_dpo/q_t": 0.4594641327857971, + "grad_norm": 42.72023391723633, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.5792273879051208, + "logits/rejected": -0.5201135277748108, + "logps/chosen": -57.112030029296875, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -91.21316528320312, + "loss": 1.2359, + "margin_dpo/margin_mean": 3.2726054191589355, + "margin_dpo/margin_std": 2.975618362426758, + "step": 48 + }, + { + "KL/chosen_KL_mean": 0.2817058563232422, + "KL/mean": -1.6360485553741455, + "KL/rejected_KL_mean": -3.5537986755371094, + "KL/std": 3.4877753257751465, + "epoch": 0.07195301027900147, + "fcm_dpo/beta": 0.05044425278902054, + "fcm_dpo/delta": 0.08806969970464706, + "fcm_dpo/margin": 3.8355047702789307, + "fcm_dpo/q_t": 0.45288553833961487, + "grad_norm": 37.98999786376953, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.5384161472320557, + "logits/rejected": -0.4786253571510315, + "logps/chosen": -61.403564453125, + "logps/ref_chosen": -61.685272216796875, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -87.32127380371094, + "loss": 1.2127, + "margin_dpo/margin_mean": 3.8355050086975098, + "margin_dpo/margin_std": 4.0506486892700195, + "step": 49 + }, + { + "KL/chosen_KL_mean": -0.2352313995361328, + "KL/mean": -2.3096749782562256, + "KL/rejected_KL_mean": -4.384113311767578, + "KL/std": 3.5547854900360107, + "epoch": 0.07342143906020558, + "fcm_dpo/beta": 0.052318423986434937, + "fcm_dpo/delta": 0.1870485544204712, + "fcm_dpo/margin": 4.148881435394287, + "fcm_dpo/q_t": 0.4476335346698761, + "grad_norm": 38.957035064697266, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5327342748641968, + "logits/rejected": -0.49643486738204956, + "logps/chosen": -58.95936965942383, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -100.74226379394531, + "loss": 1.1922, + "margin_dpo/margin_mean": 4.148880958557129, + "margin_dpo/margin_std": 4.06521463394165, + "step": 50 + }, + { + "KL/chosen_KL_mean": -0.2880744934082031, + "KL/mean": -2.3413543701171875, + "KL/rejected_KL_mean": -4.394641876220703, + "KL/std": 4.241177558898926, + "epoch": 0.07488986784140969, + "fcm_dpo/beta": 0.05356086045503616, + "fcm_dpo/delta": 0.06885935366153717, + "fcm_dpo/margin": 4.106563568115234, + "fcm_dpo/q_t": 0.447162002325058, + "grad_norm": 34.38425827026367, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.5253022313117981, + "logits/rejected": -0.4924160838127136, + "logps/chosen": -61.6617431640625, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -80.39664459228516, + "loss": 1.1977, + "margin_dpo/margin_mean": 4.106563091278076, + "margin_dpo/margin_std": 5.20696496963501, + "step": 51 + }, + { + "KL/chosen_KL_mean": 0.3370513916015625, + "KL/mean": -2.786116600036621, + "KL/rejected_KL_mean": -5.9092864990234375, + "KL/std": 4.868247985839844, + "epoch": 0.0763582966226138, + "fcm_dpo/beta": 0.05409781634807587, + "fcm_dpo/delta": 0.06422993540763855, + "fcm_dpo/margin": 6.246335983276367, + "fcm_dpo/q_t": 0.4186936020851135, + "grad_norm": 41.69569396972656, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5800528526306152, + "logits/rejected": -0.5262706875801086, + "logps/chosen": -52.00030517578125, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -85.8832015991211, + "loss": 1.096, + "margin_dpo/margin_mean": 6.246336936950684, + "margin_dpo/margin_std": 5.239194393157959, + "step": 52 + }, + { + "KL/chosen_KL_mean": -0.2391986846923828, + "KL/mean": -3.4154043197631836, + "KL/rejected_KL_mean": -6.591606140136719, + "KL/std": 5.5702056884765625, + "epoch": 0.07782672540381791, + "fcm_dpo/beta": 0.05494330823421478, + "fcm_dpo/delta": 0.05208485573530197, + "fcm_dpo/margin": 6.352412223815918, + "fcm_dpo/q_t": 0.41828417778015137, + "grad_norm": 41.55133056640625, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.6179283857345581, + "logits/rejected": -0.5963842272758484, + "logps/chosen": -53.55384826660156, + "logps/ref_chosen": -53.31465148925781, + "logps/ref_rejected": -91.78359985351562, + "logps/rejected": -98.37519836425781, + "loss": 1.1022, + "margin_dpo/margin_mean": 6.352412223815918, + "margin_dpo/margin_std": 6.420080184936523, + "step": 53 + }, + { + "KL/chosen_KL_mean": -0.4640941619873047, + "KL/mean": -3.2297964096069336, + "KL/rejected_KL_mean": -5.995494842529297, + "KL/std": 5.26720666885376, + "epoch": 0.07929515418502203, + "fcm_dpo/beta": 0.055665239691734314, + "fcm_dpo/delta": 0.09488870948553085, + "fcm_dpo/margin": 5.531402111053467, + "fcm_dpo/q_t": 0.42631345987319946, + "grad_norm": 38.000980377197266, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.5974017381668091, + "logits/rejected": -0.5442031025886536, + "logps/chosen": -51.15275573730469, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -97.71089172363281, + "loss": 1.1223, + "margin_dpo/margin_mean": 5.53140115737915, + "margin_dpo/margin_std": 5.259613990783691, + "step": 54 + }, + { + "KL/chosen_KL_mean": -1.0277824401855469, + "KL/mean": -4.298252105712891, + "KL/rejected_KL_mean": -7.568717956542969, + "KL/std": 6.322789192199707, + "epoch": 0.08076358296622614, + "fcm_dpo/beta": 0.05629376322031021, + "fcm_dpo/delta": 0.03277287259697914, + "fcm_dpo/margin": 6.540935039520264, + "fcm_dpo/q_t": 0.4138604402542114, + "grad_norm": 37.752960205078125, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.6345809698104858, + "logits/rejected": -0.5711982250213623, + "logps/chosen": -63.64301681518555, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -96.56222534179688, + "loss": 1.0959, + "margin_dpo/margin_mean": 6.5409345626831055, + "margin_dpo/margin_std": 7.682841777801514, + "step": 55 + }, + { + "KL/chosen_KL_mean": -0.8249740600585938, + "KL/mean": -3.960031032562256, + "KL/rejected_KL_mean": -7.095088958740234, + "KL/std": 6.235048294067383, + "epoch": 0.08223201174743025, + "fcm_dpo/beta": 0.05667191743850708, + "fcm_dpo/delta": 0.0462585911154747, + "fcm_dpo/margin": 6.270114421844482, + "fcm_dpo/q_t": 0.41691917181015015, + "grad_norm": 35.4822883605957, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.6079816818237305, + "logits/rejected": -0.5654845237731934, + "logps/chosen": -58.7577018737793, + "logps/ref_chosen": -57.9327278137207, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -101.26953125, + "loss": 1.1059, + "margin_dpo/margin_mean": 6.270113945007324, + "margin_dpo/margin_std": 7.562032699584961, + "step": 56 + }, + { + "KL/chosen_KL_mean": -0.8334732055664062, + "KL/mean": -4.382845401763916, + "KL/rejected_KL_mean": -7.932216644287109, + "KL/std": 6.173903942108154, + "epoch": 0.08370044052863436, + "fcm_dpo/beta": 0.05690793693065643, + "fcm_dpo/delta": -0.004338288679718971, + "fcm_dpo/margin": 7.098737716674805, + "fcm_dpo/q_t": 0.4039880931377411, + "grad_norm": 39.752960205078125, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.6026902198791504, + "logits/rejected": -0.5755797624588013, + "logps/chosen": -71.3287582397461, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -103.49767303466797, + "loss": 1.0592, + "margin_dpo/margin_mean": 7.0987372398376465, + "margin_dpo/margin_std": 6.829120635986328, + "step": 57 + }, + { + "KL/chosen_KL_mean": -1.1680717468261719, + "KL/mean": -5.124210834503174, + "KL/rejected_KL_mean": -9.08034896850586, + "KL/std": 7.0405378341674805, + "epoch": 0.08516886930983847, + "fcm_dpo/beta": 0.05647977069020271, + "fcm_dpo/delta": -0.049127254635095596, + "fcm_dpo/margin": 7.912271499633789, + "fcm_dpo/q_t": 0.3962929844856262, + "grad_norm": 41.28611755371094, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.5904037952423096, + "logits/rejected": -0.5139462947845459, + "logps/chosen": -63.30101013183594, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -93.69764709472656, + "loss": 1.0414, + "margin_dpo/margin_mean": 7.912272930145264, + "margin_dpo/margin_std": 8.415109634399414, + "step": 58 + }, + { + "KL/chosen_KL_mean": -1.6745834350585938, + "KL/mean": -6.046883583068848, + "KL/rejected_KL_mean": -10.419181823730469, + "KL/std": 7.833342552185059, + "epoch": 0.08663729809104258, + "fcm_dpo/beta": 0.05512422323226929, + "fcm_dpo/delta": -0.08732414245605469, + "fcm_dpo/margin": 8.744604110717773, + "fcm_dpo/q_t": 0.388535737991333, + "grad_norm": 41.06474304199219, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6156203746795654, + "logits/rejected": -0.5749033689498901, + "logps/chosen": -53.60710906982422, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -99.30438232421875, + "loss": 1.0196, + "margin_dpo/margin_mean": 8.744604110717773, + "margin_dpo/margin_std": 9.00253677368164, + "step": 59 + }, + { + "KL/chosen_KL_mean": -3.0149459838867188, + "KL/mean": -6.47147274017334, + "KL/rejected_KL_mean": -9.927997589111328, + "KL/std": 7.12081241607666, + "epoch": 0.0881057268722467, + "fcm_dpo/beta": 0.055202968418598175, + "fcm_dpo/delta": 0.01902601681649685, + "fcm_dpo/margin": 6.913043975830078, + "fcm_dpo/q_t": 0.4095836579799652, + "grad_norm": 41.82515335083008, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.6264551877975464, + "logits/rejected": -0.5676676034927368, + "logps/chosen": -63.95713424682617, + "logps/ref_chosen": -60.94218826293945, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -95.32139587402344, + "loss": 1.0887, + "margin_dpo/margin_mean": 6.913043975830078, + "margin_dpo/margin_std": 8.055152893066406, + "step": 60 + }, + { + "KL/chosen_KL_mean": -1.785238265991211, + "KL/mean": -6.138444900512695, + "KL/rejected_KL_mean": -10.491649627685547, + "KL/std": 9.666519165039062, + "epoch": 0.08957415565345081, + "fcm_dpo/beta": 0.054592475295066833, + "fcm_dpo/delta": -0.07944516837596893, + "fcm_dpo/margin": 8.706413269042969, + "fcm_dpo/q_t": 0.3965580463409424, + "grad_norm": 36.854209899902344, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.6180921196937561, + "logits/rejected": -0.5846239328384399, + "logps/chosen": -62.41876220703125, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -100.34414672851562, + "loss": 1.0562, + "margin_dpo/margin_mean": 8.706413269042969, + "margin_dpo/margin_std": 12.064128875732422, + "step": 61 + }, + { + "KL/chosen_KL_mean": -1.9152297973632812, + "KL/mean": -5.1878767013549805, + "KL/rejected_KL_mean": -8.460521697998047, + "KL/std": 7.09406852722168, + "epoch": 0.09104258443465492, + "fcm_dpo/beta": 0.05502016097307205, + "fcm_dpo/delta": 0.041067853569984436, + "fcm_dpo/margin": 6.545290470123291, + "fcm_dpo/q_t": 0.416960746049881, + "grad_norm": 35.16862487792969, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.6116189956665039, + "logits/rejected": -0.5774843692779541, + "logps/chosen": -58.066001892089844, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -84.02671813964844, + "loss": 1.1131, + "margin_dpo/margin_mean": 6.545290946960449, + "margin_dpo/margin_std": 8.706681251525879, + "step": 62 + }, + { + "KL/chosen_KL_mean": -3.2132091522216797, + "KL/mean": -7.6087846755981445, + "KL/rejected_KL_mean": -12.004364013671875, + "KL/std": 9.102246284484863, + "epoch": 0.09251101321585903, + "fcm_dpo/beta": 0.05407857149839401, + "fcm_dpo/delta": -0.07995294779539108, + "fcm_dpo/margin": 8.791152000427246, + "fcm_dpo/q_t": 0.38956284523010254, + "grad_norm": 39.14129638671875, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.6333186626434326, + "logits/rejected": -0.5910948514938354, + "logps/chosen": -76.36060333251953, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -109.61442565917969, + "loss": 1.0306, + "margin_dpo/margin_mean": 8.791152000427246, + "margin_dpo/margin_std": 9.617431640625, + "step": 63 + }, + { + "KL/chosen_KL_mean": -1.370086669921875, + "KL/mean": -6.633951187133789, + "KL/rejected_KL_mean": -11.897815704345703, + "KL/std": 9.861505508422852, + "epoch": 0.09397944199706314, + "fcm_dpo/beta": 0.05238521099090576, + "fcm_dpo/delta": -0.16296426951885223, + "fcm_dpo/margin": 10.527724266052246, + "fcm_dpo/q_t": 0.37639370560646057, + "grad_norm": 35.86033248901367, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.5971044898033142, + "logits/rejected": -0.5653672218322754, + "logps/chosen": -55.36868667602539, + "logps/ref_chosen": -53.998600006103516, + "logps/ref_rejected": -93.53019714355469, + "logps/rejected": -105.42801666259766, + "loss": 0.9991, + "margin_dpo/margin_mean": 10.527724266052246, + "margin_dpo/margin_std": 12.154861450195312, + "step": 64 + }, + { + "KL/chosen_KL_mean": -3.860137939453125, + "KL/mean": -9.250676155090332, + "KL/rejected_KL_mean": -14.641212463378906, + "KL/std": 11.2495698928833, + "epoch": 0.09544787077826726, + "fcm_dpo/beta": 0.05090530961751938, + "fcm_dpo/delta": -0.15866145491600037, + "fcm_dpo/margin": 10.78107738494873, + "fcm_dpo/q_t": 0.3747457265853882, + "grad_norm": 36.674705505371094, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6921563148498535, + "logits/rejected": -0.6807618737220764, + "logps/chosen": -68.69613647460938, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -124.58767700195312, + "loss": 1.0021, + "margin_dpo/margin_mean": 10.781078338623047, + "margin_dpo/margin_std": 12.379752159118652, + "step": 65 + }, + { + "KL/chosen_KL_mean": -3.5839481353759766, + "KL/mean": -8.451730728149414, + "KL/rejected_KL_mean": -13.319507598876953, + "KL/std": 10.042497634887695, + "epoch": 0.09691629955947137, + "fcm_dpo/beta": 0.04975783824920654, + "fcm_dpo/delta": -0.08998537063598633, + "fcm_dpo/margin": 9.735556602478027, + "fcm_dpo/q_t": 0.39193886518478394, + "grad_norm": 32.98362350463867, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6278406381607056, + "logits/rejected": -0.5947624444961548, + "logps/chosen": -55.02747344970703, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629913330078, + "logps/rejected": -88.955810546875, + "loss": 1.0403, + "margin_dpo/margin_mean": 9.735556602478027, + "margin_dpo/margin_std": 12.185860633850098, + "step": 66 + }, + { + "KL/chosen_KL_mean": -3.3043766021728516, + "KL/mean": -8.373483657836914, + "KL/rejected_KL_mean": -13.44259262084961, + "KL/std": 10.349268913269043, + "epoch": 0.09838472834067548, + "fcm_dpo/beta": 0.049183670431375504, + "fcm_dpo/delta": -0.10368002951145172, + "fcm_dpo/margin": 10.138212203979492, + "fcm_dpo/q_t": 0.38883906602859497, + "grad_norm": 33.585758209228516, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.6145930886268616, + "logits/rejected": -0.5722061395645142, + "logps/chosen": -62.645179748535156, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78728485107422, + "logps/rejected": -86.22987365722656, + "loss": 1.031, + "margin_dpo/margin_mean": 10.138212203979492, + "margin_dpo/margin_std": 12.514627456665039, + "step": 67 + }, + { + "KL/chosen_KL_mean": -4.238227844238281, + "KL/mean": -9.08108901977539, + "KL/rejected_KL_mean": -13.923954010009766, + "KL/std": 9.300742149353027, + "epoch": 0.09985315712187959, + "fcm_dpo/beta": 0.04838772863149643, + "fcm_dpo/delta": -0.0719866156578064, + "fcm_dpo/margin": 9.685721397399902, + "fcm_dpo/q_t": 0.39209192991256714, + "grad_norm": 33.56498336791992, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6377149820327759, + "logits/rejected": -0.5744598507881165, + "logps/chosen": -69.44406127929688, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -91.1312026977539, + "loss": 1.0309, + "margin_dpo/margin_mean": 9.685721397399902, + "margin_dpo/margin_std": 10.507135391235352, + "step": 68 + }, + { + "KL/chosen_KL_mean": -5.249656677246094, + "KL/mean": -11.655780792236328, + "KL/rejected_KL_mean": -18.061904907226562, + "KL/std": 11.926582336425781, + "epoch": 0.1013215859030837, + "fcm_dpo/beta": 0.04644005745649338, + "fcm_dpo/delta": -0.20860767364501953, + "fcm_dpo/margin": 12.812248229980469, + "fcm_dpo/q_t": 0.36728864908218384, + "grad_norm": 35.174522399902344, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.6080530285835266, + "logits/rejected": -0.5861386060714722, + "logps/chosen": -65.06889343261719, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -121.45076751708984, + "loss": 0.9606, + "margin_dpo/margin_mean": 12.812247276306152, + "margin_dpo/margin_std": 13.367576599121094, + "step": 69 + }, + { + "KL/chosen_KL_mean": -7.555110931396484, + "KL/mean": -14.482812881469727, + "KL/rejected_KL_mean": -21.41051483154297, + "KL/std": 15.016265869140625, + "epoch": 0.1027900146842878, + "fcm_dpo/beta": 0.04471848905086517, + "fcm_dpo/delta": -0.2335546314716339, + "fcm_dpo/margin": 13.855405807495117, + "fcm_dpo/q_t": 0.3647538721561432, + "grad_norm": 36.163818359375, + "learning_rate": 5e-07, + "logits/chosen": -0.6315578818321228, + "logits/rejected": -0.5984231233596802, + "logps/chosen": -69.48574829101562, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.06078338623047, + "logps/rejected": -112.47129821777344, + "loss": 0.9698, + "margin_dpo/margin_mean": 13.855405807495117, + "margin_dpo/margin_std": 15.959779739379883, + "step": 70 + }, + { + "KL/chosen_KL_mean": -7.743133544921875, + "KL/mean": -16.083194732666016, + "KL/rejected_KL_mean": -24.423255920410156, + "KL/std": 16.247331619262695, + "epoch": 0.10425844346549193, + "fcm_dpo/beta": 0.04178931191563606, + "fcm_dpo/delta": -0.32131147384643555, + "fcm_dpo/margin": 16.68012237548828, + "fcm_dpo/q_t": 0.35157865285873413, + "grad_norm": 33.68754196166992, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.6753981113433838, + "logits/rejected": -0.63995361328125, + "logps/chosen": -69.49346923828125, + "logps/ref_chosen": -61.750335693359375, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -121.75987243652344, + "loss": 0.9201, + "margin_dpo/margin_mean": 16.68012237548828, + "margin_dpo/margin_std": 17.86024284362793, + "step": 71 + }, + { + "KL/chosen_KL_mean": -8.834901809692383, + "KL/mean": -17.32482147216797, + "KL/rejected_KL_mean": -25.814735412597656, + "KL/std": 17.12436866760254, + "epoch": 0.10572687224669604, + "fcm_dpo/beta": 0.03948363661766052, + "fcm_dpo/delta": -0.290458619594574, + "fcm_dpo/margin": 16.97983169555664, + "fcm_dpo/q_t": 0.35484981536865234, + "grad_norm": 35.54236602783203, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6629385352134705, + "logits/rejected": -0.6264936327934265, + "logps/chosen": -74.88831329345703, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -121.10172271728516, + "loss": 0.9598, + "margin_dpo/margin_mean": 16.97983169555664, + "margin_dpo/margin_std": 20.594505310058594, + "step": 72 + }, + { + "KL/chosen_KL_mean": -10.812238693237305, + "KL/mean": -19.365093231201172, + "KL/rejected_KL_mean": -27.917957305908203, + "KL/std": 21.935474395751953, + "epoch": 0.10719530102790015, + "fcm_dpo/beta": 0.03733060508966446, + "fcm_dpo/delta": -0.25653302669525146, + "fcm_dpo/margin": 17.105716705322266, + "fcm_dpo/q_t": 0.37481170892715454, + "grad_norm": 35.56188201904297, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6949942708015442, + "logits/rejected": -0.6558982729911804, + "logps/chosen": -77.06851196289062, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613098144531, + "logps/rejected": -118.37409210205078, + "loss": 1.0587, + "margin_dpo/margin_mean": 17.105716705322266, + "margin_dpo/margin_std": 27.76058006286621, + "step": 73 + }, + { + "KL/chosen_KL_mean": -11.020193099975586, + "KL/mean": -20.51835823059082, + "KL/rejected_KL_mean": -30.016517639160156, + "KL/std": 24.099422454833984, + "epoch": 0.10866372980910426, + "fcm_dpo/beta": 0.03536809980869293, + "fcm_dpo/delta": -0.29220515489578247, + "fcm_dpo/margin": 18.996326446533203, + "fcm_dpo/q_t": 0.36699697375297546, + "grad_norm": 35.718910217285156, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6316280364990234, + "logits/rejected": -0.6258025169372559, + "logps/chosen": -64.44507598876953, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -125.96345520019531, + "loss": 1.0082, + "margin_dpo/margin_mean": 18.996326446533203, + "margin_dpo/margin_std": 30.294841766357422, + "step": 74 + }, + { + "KL/chosen_KL_mean": -10.813570022583008, + "KL/mean": -24.00226402282715, + "KL/rejected_KL_mean": -37.190948486328125, + "KL/std": 24.315690994262695, + "epoch": 0.11013215859030837, + "fcm_dpo/beta": 0.03265610337257385, + "fcm_dpo/delta": -0.5023984909057617, + "fcm_dpo/margin": 26.377384185791016, + "fcm_dpo/q_t": 0.3248700201511383, + "grad_norm": 30.176677703857422, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6893298625946045, + "logits/rejected": -0.6836451292037964, + "logps/chosen": -62.67523193359375, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25398254394531, + "logps/rejected": -148.44493103027344, + "loss": 0.862, + "margin_dpo/margin_mean": 26.377382278442383, + "margin_dpo/margin_std": 27.90032196044922, + "step": 75 + }, + { + "KL/chosen_KL_mean": -12.86973762512207, + "KL/mean": -20.976146697998047, + "KL/rejected_KL_mean": -29.082550048828125, + "KL/std": 19.501949310302734, + "epoch": 0.11160058737151249, + "fcm_dpo/beta": 0.031097372993826866, + "fcm_dpo/delta": -0.10961665213108063, + "fcm_dpo/margin": 16.212825775146484, + "fcm_dpo/q_t": 0.3852229416370392, + "grad_norm": 31.088647842407227, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.7124214172363281, + "logits/rejected": -0.6733113527297974, + "logps/chosen": -66.13577270507812, + "logps/ref_chosen": -53.26603698730469, + "logps/ref_rejected": -78.21662902832031, + "logps/rejected": -107.29917907714844, + "loss": 1.0435, + "margin_dpo/margin_mean": 16.21282386779785, + "margin_dpo/margin_std": 21.476770401000977, + "step": 76 + }, + { + "KL/chosen_KL_mean": -11.584232330322266, + "KL/mean": -24.901695251464844, + "KL/rejected_KL_mean": -38.21916580200195, + "KL/std": 26.45264434814453, + "epoch": 0.1130690161527166, + "fcm_dpo/beta": 0.028904041275382042, + "fcm_dpo/delta": -0.40436428785324097, + "fcm_dpo/margin": 26.634933471679688, + "fcm_dpo/q_t": 0.3336452543735504, + "grad_norm": 32.505531311035156, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.678729236125946, + "logits/rejected": -0.6420924663543701, + "logps/chosen": -69.680908203125, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -131.99278259277344, + "loss": 0.9116, + "margin_dpo/margin_mean": 26.634933471679688, + "margin_dpo/margin_std": 29.75762176513672, + "step": 77 + }, + { + "KL/chosen_KL_mean": -11.404712677001953, + "KL/mean": -21.245365142822266, + "KL/rejected_KL_mean": -31.086013793945312, + "KL/std": 21.289440155029297, + "epoch": 0.1145374449339207, + "fcm_dpo/beta": 0.027905140072107315, + "fcm_dpo/delta": -0.15760990977287292, + "fcm_dpo/margin": 19.68130111694336, + "fcm_dpo/q_t": 0.3796628713607788, + "grad_norm": 28.661237716674805, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6651836037635803, + "logits/rejected": -0.6431748270988464, + "logps/chosen": -67.01849365234375, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -116.02037811279297, + "loss": 1.0166, + "margin_dpo/margin_mean": 19.68130111694336, + "margin_dpo/margin_std": 24.757617950439453, + "step": 78 + }, + { + "KL/chosen_KL_mean": -11.86384391784668, + "KL/mean": -22.519290924072266, + "KL/rejected_KL_mean": -33.17473602294922, + "KL/std": 23.79071044921875, + "epoch": 0.11600587371512482, + "fcm_dpo/beta": 0.02685295045375824, + "fcm_dpo/delta": -0.18282675743103027, + "fcm_dpo/margin": 21.310894012451172, + "fcm_dpo/q_t": 0.3792785704135895, + "grad_norm": 25.604280471801758, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.6872971057891846, + "logits/rejected": -0.6611636281013489, + "logps/chosen": -67.3143310546875, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -120.82229614257812, + "loss": 1.0255, + "margin_dpo/margin_mean": 21.310894012451172, + "margin_dpo/margin_std": 29.21087074279785, + "step": 79 + }, + { + "KL/chosen_KL_mean": -14.992725372314453, + "KL/mean": -25.173377990722656, + "KL/rejected_KL_mean": -35.354034423828125, + "KL/std": 25.4627742767334, + "epoch": 0.11747430249632893, + "fcm_dpo/beta": 0.02588074468076229, + "fcm_dpo/delta": -0.13631115853786469, + "fcm_dpo/margin": 20.36130142211914, + "fcm_dpo/q_t": 0.39049336314201355, + "grad_norm": 27.309263229370117, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.7352666854858398, + "logits/rejected": -0.69718337059021, + "logps/chosen": -73.51201629638672, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -122.90153503417969, + "loss": 1.0581, + "margin_dpo/margin_mean": 20.36130142211914, + "margin_dpo/margin_std": 29.580612182617188, + "step": 80 + }, + { + "KL/chosen_KL_mean": -14.040748596191406, + "KL/mean": -27.886680603027344, + "KL/rejected_KL_mean": -41.73262023925781, + "KL/std": 29.870553970336914, + "epoch": 0.11894273127753303, + "fcm_dpo/beta": 0.024616166949272156, + "fcm_dpo/delta": -0.30468764901161194, + "fcm_dpo/margin": 27.69186782836914, + "fcm_dpo/q_t": 0.35815176367759705, + "grad_norm": 27.911373138427734, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.6956121921539307, + "logits/rejected": -0.6685779094696045, + "logps/chosen": -80.48961639404297, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -171.39532470703125, + "loss": 0.9671, + "margin_dpo/margin_mean": 27.69186782836914, + "margin_dpo/margin_std": 35.64756393432617, + "step": 81 + }, + { + "KL/chosen_KL_mean": -16.342798233032227, + "KL/mean": -27.334274291992188, + "KL/rejected_KL_mean": -38.32575607299805, + "KL/std": 27.074535369873047, + "epoch": 0.12041116005873716, + "fcm_dpo/beta": 0.024119626730680466, + "fcm_dpo/delta": -0.1382198929786682, + "fcm_dpo/margin": 21.982955932617188, + "fcm_dpo/q_t": 0.389964759349823, + "grad_norm": 31.045066833496094, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6336376070976257, + "logits/rejected": -0.6242895722389221, + "logps/chosen": -68.57518005371094, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -129.06900024414062, + "loss": 1.1014, + "margin_dpo/margin_mean": 21.982955932617188, + "margin_dpo/margin_std": 37.50407028198242, + "step": 82 + }, + { + "KL/chosen_KL_mean": -16.703378677368164, + "KL/mean": -29.769197463989258, + "KL/rejected_KL_mean": -42.835018157958984, + "KL/std": 28.040857315063477, + "epoch": 0.12187958883994127, + "fcm_dpo/beta": 0.022889206185936928, + "fcm_dpo/delta": -0.21175748109817505, + "fcm_dpo/margin": 26.131641387939453, + "fcm_dpo/q_t": 0.3712141811847687, + "grad_norm": 27.287622451782227, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6630829572677612, + "logits/rejected": -0.6318089962005615, + "logps/chosen": -72.53076171875, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71589660644531, + "logps/rejected": -146.55091857910156, + "loss": 1.0016, + "margin_dpo/margin_mean": 26.131643295288086, + "margin_dpo/margin_std": 33.42725372314453, + "step": 83 + }, + { + "KL/chosen_KL_mean": -15.800683975219727, + "KL/mean": -26.602916717529297, + "KL/rejected_KL_mean": -37.4051513671875, + "KL/std": 23.81344223022461, + "epoch": 0.12334801762114538, + "fcm_dpo/beta": 0.022253597155213356, + "fcm_dpo/delta": -0.08681607246398926, + "fcm_dpo/margin": 21.604461669921875, + "fcm_dpo/q_t": 0.3886079490184784, + "grad_norm": 24.89923858642578, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6726013422012329, + "logits/rejected": -0.635522723197937, + "logps/chosen": -82.97685241699219, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -124.70375061035156, + "loss": 1.0329, + "margin_dpo/margin_mean": 21.604461669921875, + "margin_dpo/margin_std": 24.17331314086914, + "step": 84 + }, + { + "KL/chosen_KL_mean": -15.828641891479492, + "KL/mean": -27.07083511352539, + "KL/rejected_KL_mean": -38.31303024291992, + "KL/std": 24.207460403442383, + "epoch": 0.12481644640234948, + "fcm_dpo/beta": 0.022155042737722397, + "fcm_dpo/delta": -0.10365713387727737, + "fcm_dpo/margin": 22.484390258789062, + "fcm_dpo/q_t": 0.3894878029823303, + "grad_norm": 24.93961524963379, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6551598310470581, + "logits/rejected": -0.623712420463562, + "logps/chosen": -74.23526000976562, + "logps/ref_chosen": -58.4066162109375, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -116.95182800292969, + "loss": 1.0486, + "margin_dpo/margin_mean": 22.484390258789062, + "margin_dpo/margin_std": 29.117321014404297, + "step": 85 + }, + { + "KL/chosen_KL_mean": -20.43706703186035, + "KL/mean": -32.29643249511719, + "KL/rejected_KL_mean": -44.155792236328125, + "KL/std": 33.11329650878906, + "epoch": 0.1262848751835536, + "fcm_dpo/beta": 0.0215867031365633, + "fcm_dpo/delta": -0.1179293692111969, + "fcm_dpo/margin": 23.718732833862305, + "fcm_dpo/q_t": 0.40107935667037964, + "grad_norm": 31.930374145507812, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.6938769817352295, + "logits/rejected": -0.6838431358337402, + "logps/chosen": -76.57453155517578, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -132.27743530273438, + "loss": 1.1337, + "margin_dpo/margin_mean": 23.718732833862305, + "margin_dpo/margin_std": 45.869529724121094, + "step": 86 + }, + { + "KL/chosen_KL_mean": -20.00033950805664, + "KL/mean": -34.11670684814453, + "KL/rejected_KL_mean": -48.23307418823242, + "KL/std": 32.86204528808594, + "epoch": 0.1277533039647577, + "fcm_dpo/beta": 0.020640596747398376, + "fcm_dpo/delta": -0.19566936790943146, + "fcm_dpo/margin": 28.232730865478516, + "fcm_dpo/q_t": 0.3739252984523773, + "grad_norm": 25.189109802246094, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.7301120758056641, + "logits/rejected": -0.7094823122024536, + "logps/chosen": -75.63643646240234, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -143.70065307617188, + "loss": 1.0366, + "margin_dpo/margin_mean": 28.232730865478516, + "margin_dpo/margin_std": 40.02729797363281, + "step": 87 + }, + { + "KL/chosen_KL_mean": -25.23326873779297, + "KL/mean": -35.901371002197266, + "KL/rejected_KL_mean": -46.5694694519043, + "KL/std": 34.48196792602539, + "epoch": 0.12922173274596183, + "fcm_dpo/beta": 0.020320210605859756, + "fcm_dpo/delta": -0.03590092435479164, + "fcm_dpo/margin": 21.33620262145996, + "fcm_dpo/q_t": 0.40818527340888977, + "grad_norm": 26.31541633605957, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.6587375402450562, + "logits/rejected": -0.6542388796806335, + "logps/chosen": -98.9044189453125, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -153.27796936035156, + "loss": 1.1371, + "margin_dpo/margin_mean": 21.33620262145996, + "margin_dpo/margin_std": 36.64844512939453, + "step": 88 + }, + { + "KL/chosen_KL_mean": -15.452627182006836, + "KL/mean": -27.265708923339844, + "KL/rejected_KL_mean": -39.078792572021484, + "KL/std": 28.668766021728516, + "epoch": 0.13069016152716592, + "fcm_dpo/beta": 0.02023715153336525, + "fcm_dpo/delta": -0.08208386600017548, + "fcm_dpo/margin": 23.62615966796875, + "fcm_dpo/q_t": 0.3936477303504944, + "grad_norm": 23.98124122619629, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.7175350189208984, + "logits/rejected": -0.6864731311798096, + "logps/chosen": -76.07754516601562, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -121.16233825683594, + "loss": 1.0599, + "margin_dpo/margin_mean": 23.626155853271484, + "margin_dpo/margin_std": 32.83501434326172, + "step": 89 + }, + { + "KL/chosen_KL_mean": -19.54310417175293, + "KL/mean": -35.97503662109375, + "KL/rejected_KL_mean": -52.406978607177734, + "KL/std": 39.376708984375, + "epoch": 0.13215859030837004, + "fcm_dpo/beta": 0.01951216161251068, + "fcm_dpo/delta": -0.2572743892669678, + "fcm_dpo/margin": 32.86386489868164, + "fcm_dpo/q_t": 0.37370768189430237, + "grad_norm": 24.72134780883789, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.636969268321991, + "logits/rejected": -0.6531749963760376, + "logps/chosen": -72.82841491699219, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -163.95169067382812, + "loss": 1.0349, + "margin_dpo/margin_mean": 32.863868713378906, + "margin_dpo/margin_std": 49.08679962158203, + "step": 90 + }, + { + "KL/chosen_KL_mean": -20.63615608215332, + "KL/mean": -33.67596435546875, + "KL/rejected_KL_mean": -46.71577453613281, + "KL/std": 31.137435913085938, + "epoch": 0.13362701908957417, + "fcm_dpo/beta": 0.018853671848773956, + "fcm_dpo/delta": -0.0963558554649353, + "fcm_dpo/margin": 26.079620361328125, + "fcm_dpo/q_t": 0.3956088721752167, + "grad_norm": 24.04185676574707, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6773035526275635, + "logits/rejected": -0.645221471786499, + "logps/chosen": -82.43911743164062, + "logps/ref_chosen": -61.802955627441406, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -134.5897216796875, + "loss": 1.0839, + "margin_dpo/margin_mean": 26.079620361328125, + "margin_dpo/margin_std": 40.8889274597168, + "step": 91 + }, + { + "KL/chosen_KL_mean": -18.164283752441406, + "KL/mean": -31.569286346435547, + "KL/rejected_KL_mean": -44.97429275512695, + "KL/std": 31.935270309448242, + "epoch": 0.13509544787077826, + "fcm_dpo/beta": 0.01851240172982216, + "fcm_dpo/delta": -0.10138699412345886, + "fcm_dpo/margin": 26.81001091003418, + "fcm_dpo/q_t": 0.3927502930164337, + "grad_norm": 22.194721221923828, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.6895343065261841, + "logits/rejected": -0.6665633916854858, + "logps/chosen": -69.8050537109375, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -122.85547637939453, + "loss": 1.0646, + "margin_dpo/margin_mean": 26.81001091003418, + "margin_dpo/margin_std": 38.659828186035156, + "step": 92 + }, + { + "KL/chosen_KL_mean": -20.49032211303711, + "KL/mean": -33.852867126464844, + "KL/rejected_KL_mean": -47.21541213989258, + "KL/std": 29.824676513671875, + "epoch": 0.13656387665198239, + "fcm_dpo/beta": 0.018106218427419662, + "fcm_dpo/delta": -0.08807133883237839, + "fcm_dpo/margin": 26.72509002685547, + "fcm_dpo/q_t": 0.39256197214126587, + "grad_norm": 23.166257858276367, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.7384850978851318, + "logits/rejected": -0.6940040588378906, + "logps/chosen": -73.01956176757812, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.16075134277344, + "logps/rejected": -124.37615966796875, + "loss": 1.0447, + "margin_dpo/margin_mean": 26.72509002685547, + "margin_dpo/margin_std": 34.69382095336914, + "step": 93 + }, + { + "KL/chosen_KL_mean": -21.992738723754883, + "KL/mean": -37.579185485839844, + "KL/rejected_KL_mean": -53.16563034057617, + "KL/std": 34.558380126953125, + "epoch": 0.13803230543318648, + "fcm_dpo/beta": 0.017487093806266785, + "fcm_dpo/delta": -0.15506845712661743, + "fcm_dpo/margin": 31.172901153564453, + "fcm_dpo/q_t": 0.378325879573822, + "grad_norm": 22.59712028503418, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6630722880363464, + "logits/rejected": -0.6506177186965942, + "logps/chosen": -83.21534729003906, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -152.76466369628906, + "loss": 1.0039, + "margin_dpo/margin_mean": 31.172901153564453, + "margin_dpo/margin_std": 36.12760925292969, + "step": 94 + }, + { + "KL/chosen_KL_mean": -21.189931869506836, + "KL/mean": -33.615150451660156, + "KL/rejected_KL_mean": -46.040374755859375, + "KL/std": 33.43156433105469, + "epoch": 0.1395007342143906, + "fcm_dpo/beta": 0.01729883998632431, + "fcm_dpo/delta": -0.031661614775657654, + "fcm_dpo/margin": 24.85043716430664, + "fcm_dpo/q_t": 0.4017173647880554, + "grad_norm": 21.476041793823242, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.6821004152297974, + "logits/rejected": -0.655462920665741, + "logps/chosen": -73.71357727050781, + "logps/ref_chosen": -52.523643493652344, + "logps/ref_rejected": -75.8803482055664, + "logps/rejected": -121.92072296142578, + "loss": 1.0881, + "margin_dpo/margin_mean": 24.850439071655273, + "margin_dpo/margin_std": 35.13576889038086, + "step": 95 + }, + { + "KL/chosen_KL_mean": -20.64446449279785, + "KL/mean": -37.62309646606445, + "KL/rejected_KL_mean": -54.601722717285156, + "KL/std": 37.295902252197266, + "epoch": 0.14096916299559473, + "fcm_dpo/beta": 0.016751719638705254, + "fcm_dpo/delta": -0.18152689933776855, + "fcm_dpo/margin": 33.95726013183594, + "fcm_dpo/q_t": 0.3761478066444397, + "grad_norm": 21.628402709960938, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.7095851898193359, + "logits/rejected": -0.6844866275787354, + "logps/chosen": -82.80143737792969, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -151.19773864746094, + "loss": 0.9975, + "margin_dpo/margin_mean": 33.95726013183594, + "margin_dpo/margin_std": 41.307655334472656, + "step": 96 + }, + { + "KL/chosen_KL_mean": -21.8316650390625, + "KL/mean": -34.830955505371094, + "KL/rejected_KL_mean": -47.83024215698242, + "KL/std": 31.186649322509766, + "epoch": 0.14243759177679882, + "fcm_dpo/beta": 0.0167661365121603, + "fcm_dpo/delta": -0.0386638417840004, + "fcm_dpo/margin": 25.998580932617188, + "fcm_dpo/q_t": 0.3992045521736145, + "grad_norm": 22.661230087280273, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.6369616389274597, + "logits/rejected": -0.5970015525817871, + "logps/chosen": -76.47802734375, + "logps/ref_chosen": -54.646366119384766, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -124.79499816894531, + "loss": 1.0676, + "margin_dpo/margin_mean": 25.998584747314453, + "margin_dpo/margin_std": 31.176647186279297, + "step": 97 + }, + { + "KL/chosen_KL_mean": -27.28506088256836, + "KL/mean": -42.106605529785156, + "KL/rejected_KL_mean": -56.92816162109375, + "KL/std": 35.70084762573242, + "epoch": 0.14390602055800295, + "fcm_dpo/beta": 0.01634235680103302, + "fcm_dpo/delta": -0.08897878974676132, + "fcm_dpo/margin": 29.64310073852539, + "fcm_dpo/q_t": 0.38946154713630676, + "grad_norm": 24.021251678466797, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6834473013877869, + "logits/rejected": -0.669763445854187, + "logps/chosen": -92.54368591308594, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -153.45565795898438, + "loss": 1.074, + "margin_dpo/margin_mean": 29.64310073852539, + "margin_dpo/margin_std": 42.92453384399414, + "step": 98 + }, + { + "KL/chosen_KL_mean": -21.082822799682617, + "KL/mean": -39.566993713378906, + "KL/rejected_KL_mean": -58.05116271972656, + "KL/std": 38.398990631103516, + "epoch": 0.14537444933920704, + "fcm_dpo/beta": 0.015781186521053314, + "fcm_dpo/delta": -0.19561892747879028, + "fcm_dpo/margin": 36.968345642089844, + "fcm_dpo/q_t": 0.3759078085422516, + "grad_norm": 20.537532806396484, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.637118935585022, + "logits/rejected": -0.639615535736084, + "logps/chosen": -66.72130584716797, + "logps/ref_chosen": -45.638484954833984, + "logps/ref_rejected": -86.43793487548828, + "logps/rejected": -144.48910522460938, + "loss": 1.0012, + "margin_dpo/margin_mean": 36.968345642089844, + "margin_dpo/margin_std": 47.97369384765625, + "step": 99 + }, + { + "KL/chosen_KL_mean": -23.904550552368164, + "KL/mean": -34.20349884033203, + "KL/rejected_KL_mean": -44.5024528503418, + "KL/std": 30.948299407958984, + "epoch": 0.14684287812041116, + "fcm_dpo/beta": 0.015916183590888977, + "fcm_dpo/delta": 0.07390052080154419, + "fcm_dpo/margin": 20.597900390625, + "fcm_dpo/q_t": 0.4260128140449524, + "grad_norm": 23.365909576416016, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.6662120819091797, + "logits/rejected": -0.6264818906784058, + "logps/chosen": -81.49853515625, + "logps/ref_chosen": -57.59397888183594, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -118.56266784667969, + "loss": 1.1847, + "margin_dpo/margin_mean": 20.597902297973633, + "margin_dpo/margin_std": 39.958717346191406, + "step": 100 + }, + { + "KL/chosen_KL_mean": -29.311023712158203, + "KL/mean": -42.13480758666992, + "KL/rejected_KL_mean": -54.958587646484375, + "KL/std": 38.1388053894043, + "epoch": 0.14831130690161526, + "fcm_dpo/beta": 0.015804601833224297, + "fcm_dpo/delta": -0.006234418600797653, + "fcm_dpo/margin": 25.647552490234375, + "fcm_dpo/q_t": 0.41378289461135864, + "grad_norm": 23.029918670654297, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.6485938429832458, + "logits/rejected": -0.6187626123428345, + "logps/chosen": -90.95987701416016, + "logps/ref_chosen": -61.64885330200195, + "logps/ref_rejected": -83.18968200683594, + "logps/rejected": -138.1482696533203, + "loss": 1.1411, + "margin_dpo/margin_mean": 25.647552490234375, + "margin_dpo/margin_std": 45.90587615966797, + "step": 101 + }, + { + "KL/chosen_KL_mean": -31.269521713256836, + "KL/mean": -40.59046936035156, + "KL/rejected_KL_mean": -49.91142654418945, + "KL/std": 35.46381378173828, + "epoch": 0.14977973568281938, + "fcm_dpo/beta": 0.015855927020311356, + "fcm_dpo/delta": -0.019583335146307945, + "fcm_dpo/margin": 18.64190673828125, + "fcm_dpo/q_t": 0.43402665853500366, + "grad_norm": 26.079177856445312, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.6906998157501221, + "logits/rejected": -0.6490976810455322, + "logps/chosen": -95.34840393066406, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -118.09850311279297, + "loss": 1.222, + "margin_dpo/margin_mean": 18.641904830932617, + "margin_dpo/margin_std": 42.348793029785156, + "step": 102 + }, + { + "KL/chosen_KL_mean": -25.099937438964844, + "KL/mean": -44.034488677978516, + "KL/rejected_KL_mean": -62.969051361083984, + "KL/std": 39.63392639160156, + "epoch": 0.1512481644640235, + "fcm_dpo/beta": 0.015347588807344437, + "fcm_dpo/delta": -0.19285300374031067, + "fcm_dpo/margin": 37.86910629272461, + "fcm_dpo/q_t": 0.37476682662963867, + "grad_norm": 22.30409812927246, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.6355269551277161, + "logits/rejected": -0.6004114151000977, + "logps/chosen": -86.39921569824219, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57270812988281, + "logps/rejected": -156.54176330566406, + "loss": 1.0005, + "margin_dpo/margin_mean": 37.869102478027344, + "margin_dpo/margin_std": 48.369873046875, + "step": 103 + }, + { + "KL/chosen_KL_mean": -26.97751808166504, + "KL/mean": -45.011497497558594, + "KL/rejected_KL_mean": -63.04547119140625, + "KL/std": 43.440818786621094, + "epoch": 0.1527165932452276, + "fcm_dpo/beta": 0.014920437708497047, + "fcm_dpo/delta": -0.14584705233573914, + "fcm_dpo/margin": 36.067962646484375, + "fcm_dpo/q_t": 0.38805246353149414, + "grad_norm": 21.63632583618164, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.6813393831253052, + "logits/rejected": -0.6534780859947205, + "logps/chosen": -81.35029602050781, + "logps/ref_chosen": -54.372772216796875, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -152.61019897460938, + "loss": 1.0535, + "margin_dpo/margin_mean": 36.06795883178711, + "margin_dpo/margin_std": 53.349693298339844, + "step": 104 + }, + { + "KL/chosen_KL_mean": -25.276098251342773, + "KL/mean": -50.77607727050781, + "KL/rejected_KL_mean": -76.27605438232422, + "KL/std": 41.882408142089844, + "epoch": 0.15418502202643172, + "fcm_dpo/beta": 0.014073311351239681, + "fcm_dpo/delta": -0.34200507402420044, + "fcm_dpo/margin": 50.99995422363281, + "fcm_dpo/q_t": 0.3382055163383484, + "grad_norm": 21.752716064453125, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.6410149931907654, + "logits/rejected": -0.6186502575874329, + "logps/chosen": -79.9150390625, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -174.24957275390625, + "loss": 0.8843, + "margin_dpo/margin_mean": 50.99995803833008, + "margin_dpo/margin_std": 46.56443405151367, + "step": 105 + }, + { + "KL/chosen_KL_mean": -28.383203506469727, + "KL/mean": -44.35602569580078, + "KL/rejected_KL_mean": -60.32884216308594, + "KL/std": 36.69441223144531, + "epoch": 0.15565345080763582, + "fcm_dpo/beta": 0.013703379780054092, + "fcm_dpo/delta": -0.03964092954993248, + "fcm_dpo/margin": 31.945636749267578, + "fcm_dpo/q_t": 0.3993247449398041, + "grad_norm": 20.311628341674805, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.6390097141265869, + "logits/rejected": -0.6126164197921753, + "logps/chosen": -83.21609497070312, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -145.553466796875, + "loss": 1.0661, + "margin_dpo/margin_mean": 31.94563865661621, + "margin_dpo/margin_std": 39.89073944091797, + "step": 106 + }, + { + "KL/chosen_KL_mean": -33.93869400024414, + "KL/mean": -51.83905029296875, + "KL/rejected_KL_mean": -69.7394027709961, + "KL/std": 44.899288177490234, + "epoch": 0.15712187958883994, + "fcm_dpo/beta": 0.013466178439557552, + "fcm_dpo/delta": -0.087033212184906, + "fcm_dpo/margin": 35.80072021484375, + "fcm_dpo/q_t": 0.3910368084907532, + "grad_norm": 20.68709373474121, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.6427664756774902, + "logits/rejected": -0.5967296361923218, + "logps/chosen": -103.6465072631836, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -164.47891235351562, + "loss": 1.0566, + "margin_dpo/margin_mean": 35.800716400146484, + "margin_dpo/margin_std": 47.552371978759766, + "step": 107 + }, + { + "KL/chosen_KL_mean": -29.895898818969727, + "KL/mean": -51.84703063964844, + "KL/rejected_KL_mean": -73.79816436767578, + "KL/std": 49.70708465576172, + "epoch": 0.15859030837004406, + "fcm_dpo/beta": 0.013104308396577835, + "fcm_dpo/delta": -0.18593883514404297, + "fcm_dpo/margin": 43.90226745605469, + "fcm_dpo/q_t": 0.3812934160232544, + "grad_norm": 20.52142906188965, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.6195969581604004, + "logits/rejected": -0.5994083881378174, + "logps/chosen": -85.9057846069336, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -169.59417724609375, + "loss": 1.0418, + "margin_dpo/margin_mean": 43.90226745605469, + "margin_dpo/margin_std": 64.3550033569336, + "step": 108 + }, + { + "KL/chosen_KL_mean": -27.957853317260742, + "KL/mean": -51.0029296875, + "KL/rejected_KL_mean": -74.04800415039062, + "KL/std": 47.15357971191406, + "epoch": 0.16005873715124816, + "fcm_dpo/beta": 0.012563558295369148, + "fcm_dpo/delta": -0.19107185304164886, + "fcm_dpo/margin": 46.09014892578125, + "fcm_dpo/q_t": 0.37124601006507874, + "grad_norm": 21.661203384399414, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.5873284339904785, + "logits/rejected": -0.5662369132041931, + "logps/chosen": -90.84333801269531, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -172.73373413085938, + "loss": 1.0005, + "margin_dpo/margin_mean": 46.090152740478516, + "margin_dpo/margin_std": 56.48835754394531, + "step": 109 + }, + { + "KL/chosen_KL_mean": -28.93518829345703, + "KL/mean": -49.65400695800781, + "KL/rejected_KL_mean": -70.37283325195312, + "KL/std": 47.03770065307617, + "epoch": 0.16152716593245228, + "fcm_dpo/beta": 0.012130336835980415, + "fcm_dpo/delta": -0.11256064474582672, + "fcm_dpo/margin": 41.43762969970703, + "fcm_dpo/q_t": 0.3895985782146454, + "grad_norm": 18.499637603759766, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.5971484780311584, + "logits/rejected": -0.5627081394195557, + "logps/chosen": -87.68887329101562, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -150.1228485107422, + "loss": 1.0618, + "margin_dpo/margin_mean": 41.43762969970703, + "margin_dpo/margin_std": 56.833656311035156, + "step": 110 + }, + { + "KL/chosen_KL_mean": -32.69416046142578, + "KL/mean": -56.263702392578125, + "KL/rejected_KL_mean": -79.83323669433594, + "KL/std": 52.9500732421875, + "epoch": 0.16299559471365638, + "fcm_dpo/beta": 0.011841144412755966, + "fcm_dpo/delta": -0.1690835952758789, + "fcm_dpo/margin": 47.13909149169922, + "fcm_dpo/q_t": 0.37844541668891907, + "grad_norm": 21.36251449584961, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.6595109701156616, + "logits/rejected": -0.6392641067504883, + "logps/chosen": -101.31826782226562, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -178.26210021972656, + "loss": 1.0405, + "margin_dpo/margin_mean": 47.13909149169922, + "margin_dpo/margin_std": 65.69253540039062, + "step": 111 + }, + { + "KL/chosen_KL_mean": -29.994304656982422, + "KL/mean": -44.81261444091797, + "KL/rejected_KL_mean": -59.63092041015625, + "KL/std": 36.23802947998047, + "epoch": 0.1644640234948605, + "fcm_dpo/beta": 0.011842923238873482, + "fcm_dpo/delta": 0.050649721175432205, + "fcm_dpo/margin": 29.636615753173828, + "fcm_dpo/q_t": 0.41945815086364746, + "grad_norm": 19.11754035949707, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.6208142042160034, + "logits/rejected": -0.5849310159683228, + "logps/chosen": -80.24394226074219, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -124.40534973144531, + "loss": 1.1323, + "margin_dpo/margin_mean": 29.636615753173828, + "margin_dpo/margin_std": 44.190711975097656, + "step": 112 + }, + { + "KL/chosen_KL_mean": -37.94208526611328, + "KL/mean": -53.6860237121582, + "KL/rejected_KL_mean": -69.4299545288086, + "KL/std": 38.465965270996094, + "epoch": 0.16593245227606462, + "fcm_dpo/beta": 0.011969354934990406, + "fcm_dpo/delta": 0.024007823318243027, + "fcm_dpo/margin": 31.487873077392578, + "fcm_dpo/q_t": 0.41338014602661133, + "grad_norm": 19.75728988647461, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.5755143165588379, + "logits/rejected": -0.5319409370422363, + "logps/chosen": -104.65503692626953, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -147.39865112304688, + "loss": 1.1054, + "margin_dpo/margin_mean": 31.487873077392578, + "margin_dpo/margin_std": 42.64485549926758, + "step": 113 + }, + { + "KL/chosen_KL_mean": -34.591270446777344, + "KL/mean": -59.78630065917969, + "KL/rejected_KL_mean": -84.9813232421875, + "KL/std": 56.91395568847656, + "epoch": 0.16740088105726872, + "fcm_dpo/beta": 0.011564414948225021, + "fcm_dpo/delta": -0.19607561826705933, + "fcm_dpo/margin": 50.390052795410156, + "fcm_dpo/q_t": 0.3780399262905121, + "grad_norm": 21.318517684936523, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.5932904481887817, + "logits/rejected": -0.562382698059082, + "logps/chosen": -92.37635803222656, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -172.09097290039062, + "loss": 1.0109, + "margin_dpo/margin_mean": 50.390052795410156, + "margin_dpo/margin_std": 69.09920501708984, + "step": 114 + }, + { + "KL/chosen_KL_mean": -44.7384033203125, + "KL/mean": -61.84308624267578, + "KL/rejected_KL_mean": -78.94776153564453, + "KL/std": 54.04515838623047, + "epoch": 0.16886930983847284, + "fcm_dpo/beta": 0.011562837287783623, + "fcm_dpo/delta": 0.004423616454005241, + "fcm_dpo/margin": 34.20935821533203, + "fcm_dpo/q_t": 0.41715824604034424, + "grad_norm": 26.597360610961914, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.619019627571106, + "logits/rejected": -0.6011543273925781, + "logps/chosen": -110.321044921875, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -177.51327514648438, + "loss": 1.1766, + "margin_dpo/margin_mean": 34.20935821533203, + "margin_dpo/margin_std": 71.29178619384766, + "step": 115 + }, + { + "KL/chosen_KL_mean": -33.04991912841797, + "KL/mean": -52.28227996826172, + "KL/rejected_KL_mean": -71.5146484375, + "KL/std": 45.42361831665039, + "epoch": 0.17033773861967694, + "fcm_dpo/beta": 0.011507030576467514, + "fcm_dpo/delta": -0.04481929540634155, + "fcm_dpo/margin": 38.464725494384766, + "fcm_dpo/q_t": 0.40007728338241577, + "grad_norm": 21.224323272705078, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.5934985876083374, + "logits/rejected": -0.5850518345832825, + "logps/chosen": -84.45022583007812, + "logps/ref_chosen": -51.40031433105469, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -152.0364990234375, + "loss": 1.0662, + "margin_dpo/margin_mean": 38.464725494384766, + "margin_dpo/margin_std": 49.343894958496094, + "step": 116 + }, + { + "KL/chosen_KL_mean": -42.33665466308594, + "KL/mean": -57.977378845214844, + "KL/rejected_KL_mean": -73.61810302734375, + "KL/std": 48.001014709472656, + "epoch": 0.17180616740088106, + "fcm_dpo/beta": 0.011499254032969475, + "fcm_dpo/delta": 0.041788313537836075, + "fcm_dpo/margin": 31.281452178955078, + "fcm_dpo/q_t": 0.41948583722114563, + "grad_norm": 25.782733917236328, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.5640000104904175, + "logits/rejected": -0.526547372341156, + "logps/chosen": -111.63506317138672, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.583984375, + "logps/rejected": -140.20208740234375, + "loss": 1.1488, + "margin_dpo/margin_mean": 31.281452178955078, + "margin_dpo/margin_std": 54.715789794921875, + "step": 117 + }, + { + "KL/chosen_KL_mean": -33.31963348388672, + "KL/mean": -52.253089904785156, + "KL/rejected_KL_mean": -71.1865463256836, + "KL/std": 43.34165954589844, + "epoch": 0.17327459618208516, + "fcm_dpo/beta": 0.011471563950181007, + "fcm_dpo/delta": -0.03593885153532028, + "fcm_dpo/margin": 37.86691665649414, + "fcm_dpo/q_t": 0.40212157368659973, + "grad_norm": 20.143999099731445, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.6188483238220215, + "logits/rejected": -0.6008873581886292, + "logps/chosen": -88.96061706542969, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905975341797, + "logps/rejected": -146.85560607910156, + "loss": 1.072, + "margin_dpo/margin_mean": 37.86691665649414, + "margin_dpo/margin_std": 49.769737243652344, + "step": 118 + }, + { + "KL/chosen_KL_mean": -45.8137092590332, + "KL/mean": -64.60531616210938, + "KL/rejected_KL_mean": -83.39691162109375, + "KL/std": 48.94361877441406, + "epoch": 0.17474302496328928, + "fcm_dpo/beta": 0.011316780932247639, + "fcm_dpo/delta": -0.02757979929447174, + "fcm_dpo/margin": 37.58320236206055, + "fcm_dpo/q_t": 0.407728374004364, + "grad_norm": 22.411529541015625, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.6017279624938965, + "logits/rejected": -0.5897752046585083, + "logps/chosen": -119.32390594482422, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.977294921875, + "logps/rejected": -186.37420654296875, + "loss": 1.1113, + "margin_dpo/margin_mean": 37.58320236206055, + "margin_dpo/margin_std": 60.125755310058594, + "step": 119 + }, + { + "KL/chosen_KL_mean": -46.51409912109375, + "KL/mean": -72.65493774414062, + "KL/rejected_KL_mean": -98.7957763671875, + "KL/std": 60.961395263671875, + "epoch": 0.1762114537444934, + "fcm_dpo/beta": 0.011053888127207756, + "fcm_dpo/delta": -0.1890048086643219, + "fcm_dpo/margin": 52.28166961669922, + "fcm_dpo/q_t": 0.37308794260025024, + "grad_norm": 21.354141235351562, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.6113423109054565, + "logits/rejected": -0.5809808969497681, + "logps/chosen": -123.29493713378906, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -206.81951904296875, + "loss": 0.9987, + "margin_dpo/margin_mean": 52.281673431396484, + "margin_dpo/margin_std": 64.23561096191406, + "step": 120 + }, + { + "KL/chosen_KL_mean": -45.217079162597656, + "KL/mean": -74.15641784667969, + "KL/rejected_KL_mean": -103.09576416015625, + "KL/std": 59.655128479003906, + "epoch": 0.1776798825256975, + "fcm_dpo/beta": 0.010542536154389381, + "fcm_dpo/delta": -0.22549036145210266, + "fcm_dpo/margin": 57.878684997558594, + "fcm_dpo/q_t": 0.36856669187545776, + "grad_norm": 23.214689254760742, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.5403860807418823, + "logits/rejected": -0.5361485481262207, + "logps/chosen": -107.00697326660156, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -213.09033203125, + "loss": 0.9989, + "margin_dpo/margin_mean": 57.878684997558594, + "margin_dpo/margin_std": 74.57313537597656, + "step": 121 + }, + { + "KL/chosen_KL_mean": -41.20557403564453, + "KL/mean": -76.79521179199219, + "KL/rejected_KL_mean": -112.38485717773438, + "KL/std": 67.50093078613281, + "epoch": 0.17914831130690162, + "fcm_dpo/beta": 0.009974541142582893, + "fcm_dpo/delta": -0.33350038528442383, + "fcm_dpo/margin": 71.17928314208984, + "fcm_dpo/q_t": 0.343948096036911, + "grad_norm": 22.87774658203125, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.5265074968338013, + "logits/rejected": -0.5429497957229614, + "logps/chosen": -88.1077880859375, + "logps/ref_chosen": -46.9022102355957, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -219.09906005859375, + "loss": 0.9029, + "margin_dpo/margin_mean": 71.17927551269531, + "margin_dpo/margin_std": 70.96698760986328, + "step": 122 + }, + { + "KL/chosen_KL_mean": -45.384735107421875, + "KL/mean": -68.55912780761719, + "KL/rejected_KL_mean": -91.7335205078125, + "KL/std": 57.45310592651367, + "epoch": 0.18061674008810572, + "fcm_dpo/beta": 0.00966709479689598, + "fcm_dpo/delta": -0.051273368299007416, + "fcm_dpo/margin": 46.348785400390625, + "fcm_dpo/q_t": 0.4034884572029114, + "grad_norm": 20.789918899536133, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.5363984107971191, + "logits/rejected": -0.5196830034255981, + "logps/chosen": -106.72337341308594, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.775390625, + "logps/rejected": -179.5089111328125, + "loss": 1.1135, + "margin_dpo/margin_mean": 46.34878158569336, + "margin_dpo/margin_std": 77.05763244628906, + "step": 123 + }, + { + "KL/chosen_KL_mean": -51.98781967163086, + "KL/mean": -86.58702087402344, + "KL/rejected_KL_mean": -121.18624114990234, + "KL/std": 78.8485107421875, + "epoch": 0.18208516886930984, + "fcm_dpo/beta": 0.009301427751779556, + "fcm_dpo/delta": -0.26134854555130005, + "fcm_dpo/margin": 69.19841766357422, + "fcm_dpo/q_t": 0.36982783675193787, + "grad_norm": 24.75668716430664, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.5215315222740173, + "logits/rejected": -0.5242322683334351, + "logps/chosen": -123.4361572265625, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -238.76681518554688, + "loss": 1.0166, + "margin_dpo/margin_mean": 69.19841766357422, + "margin_dpo/margin_std": 98.82803344726562, + "step": 124 + }, + { + "KL/chosen_KL_mean": -42.808067321777344, + "KL/mean": -73.27595520019531, + "KL/rejected_KL_mean": -103.74385070800781, + "KL/std": 66.60159301757812, + "epoch": 0.18355359765051396, + "fcm_dpo/beta": 0.00900559313595295, + "fcm_dpo/delta": -0.1572001874446869, + "fcm_dpo/margin": 60.93578338623047, + "fcm_dpo/q_t": 0.38217341899871826, + "grad_norm": 19.192096710205078, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.4833022356033325, + "logits/rejected": -0.4855707287788391, + "logps/chosen": -92.94500732421875, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -187.7324676513672, + "loss": 1.034, + "margin_dpo/margin_mean": 60.93578338623047, + "margin_dpo/margin_std": 83.12398529052734, + "step": 125 + }, + { + "KL/chosen_KL_mean": -46.238609313964844, + "KL/mean": -74.72161865234375, + "KL/rejected_KL_mean": -103.20464324951172, + "KL/std": 60.32928466796875, + "epoch": 0.18502202643171806, + "fcm_dpo/beta": 0.008764306083321571, + "fcm_dpo/delta": -0.10474735498428345, + "fcm_dpo/margin": 56.966033935546875, + "fcm_dpo/q_t": 0.39040666818618774, + "grad_norm": 20.248746871948242, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.5261760354042053, + "logits/rejected": -0.5226148366928101, + "logps/chosen": -101.90567779541016, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -201.33441162109375, + "loss": 1.048, + "margin_dpo/margin_mean": 56.966033935546875, + "margin_dpo/margin_std": 76.7184066772461, + "step": 126 + }, + { + "KL/chosen_KL_mean": -44.78712844848633, + "KL/mean": -65.41035461425781, + "KL/rejected_KL_mean": -86.03358459472656, + "KL/std": 52.55406951904297, + "epoch": 0.18649045521292218, + "fcm_dpo/beta": 0.008787820115685463, + "fcm_dpo/delta": 0.03892592340707779, + "fcm_dpo/margin": 41.24645233154297, + "fcm_dpo/q_t": 0.4162459075450897, + "grad_norm": 20.812768936157227, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.5199460983276367, + "logits/rejected": -0.5012995004653931, + "logps/chosen": -101.34181213378906, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -162.82937622070312, + "loss": 1.136, + "margin_dpo/margin_mean": 41.24645233154297, + "margin_dpo/margin_std": 66.14584350585938, + "step": 127 + }, + { + "KL/chosen_KL_mean": -47.59389877319336, + "KL/mean": -69.38520812988281, + "KL/rejected_KL_mean": -91.176513671875, + "KL/std": 61.367488861083984, + "epoch": 0.18795888399412627, + "fcm_dpo/beta": 0.008857084438204765, + "fcm_dpo/delta": 0.014133721590042114, + "fcm_dpo/margin": 43.582611083984375, + "fcm_dpo/q_t": 0.4142889976501465, + "grad_norm": 27.369966506958008, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.49865514039993286, + "logits/rejected": -0.47880756855010986, + "logps/chosen": -105.71485900878906, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -167.615478515625, + "loss": 1.1451, + "margin_dpo/margin_mean": 43.58261489868164, + "margin_dpo/margin_std": 76.90882873535156, + "step": 128 + }, + { + "KL/chosen_KL_mean": -57.667259216308594, + "KL/mean": -80.6406478881836, + "KL/rejected_KL_mean": -103.61404418945312, + "KL/std": 68.38490295410156, + "epoch": 0.1894273127753304, + "fcm_dpo/beta": 0.008825141936540604, + "fcm_dpo/delta": -0.0057245357893407345, + "fcm_dpo/margin": 45.94677734375, + "fcm_dpo/q_t": 0.4152906835079193, + "grad_norm": 20.145679473876953, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.5370001196861267, + "logits/rejected": -0.5274189710617065, + "logps/chosen": -124.58363342285156, + "logps/ref_chosen": -66.91637420654297, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -200.25625610351562, + "loss": 1.1535, + "margin_dpo/margin_mean": 45.94677734375, + "margin_dpo/margin_std": 89.10274505615234, + "step": 129 + }, + { + "KL/chosen_KL_mean": -42.64732360839844, + "KL/mean": -77.17532348632812, + "KL/rejected_KL_mean": -111.70331573486328, + "KL/std": 69.53358459472656, + "epoch": 0.19089574155653452, + "fcm_dpo/beta": 0.008637124672532082, + "fcm_dpo/delta": -0.2089286893606186, + "fcm_dpo/margin": 69.05598449707031, + "fcm_dpo/q_t": 0.37180206179618835, + "grad_norm": 19.886871337890625, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.5179574489593506, + "logits/rejected": -0.513495147228241, + "logps/chosen": -87.31417846679688, + "logps/ref_chosen": -44.66685104370117, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -194.48497009277344, + "loss": 1.0019, + "margin_dpo/margin_mean": 69.05598449707031, + "margin_dpo/margin_std": 84.9914779663086, + "step": 130 + }, + { + "KL/chosen_KL_mean": -38.19416046142578, + "KL/mean": -72.767822265625, + "KL/rejected_KL_mean": -107.34147644042969, + "KL/std": 69.4886474609375, + "epoch": 0.19236417033773862, + "fcm_dpo/beta": 0.008221091702580452, + "fcm_dpo/delta": -0.1789543628692627, + "fcm_dpo/margin": 69.14730834960938, + "fcm_dpo/q_t": 0.3706004023551941, + "grad_norm": 27.72515869140625, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.4905538558959961, + "logits/rejected": -0.49695295095443726, + "logps/chosen": -83.11874389648438, + "logps/ref_chosen": -44.924591064453125, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -195.78549194335938, + "loss": 0.9848, + "margin_dpo/margin_mean": 69.14730834960938, + "margin_dpo/margin_std": 75.87403869628906, + "step": 131 + }, + { + "KL/chosen_KL_mean": -50.51142120361328, + "KL/mean": -78.78823852539062, + "KL/rejected_KL_mean": -107.0650634765625, + "KL/std": 68.71534729003906, + "epoch": 0.19383259911894274, + "fcm_dpo/beta": 0.008070580661296844, + "fcm_dpo/delta": -0.05931827053427696, + "fcm_dpo/margin": 56.55363082885742, + "fcm_dpo/q_t": 0.40248197317123413, + "grad_norm": 19.825626373291016, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.5076676607131958, + "logits/rejected": -0.5060294270515442, + "logps/chosen": -109.51251220703125, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -194.95721435546875, + "loss": 1.0917, + "margin_dpo/margin_mean": 56.55363082885742, + "margin_dpo/margin_std": 89.22288513183594, + "step": 132 + }, + { + "KL/chosen_KL_mean": -60.22153854370117, + "KL/mean": -85.20774841308594, + "KL/rejected_KL_mean": -110.1939697265625, + "KL/std": 62.262081146240234, + "epoch": 0.19530102790014683, + "fcm_dpo/beta": 0.008044019341468811, + "fcm_dpo/delta": -0.002142667770385742, + "fcm_dpo/margin": 49.97242736816406, + "fcm_dpo/q_t": 0.41164666414260864, + "grad_norm": 27.044200897216797, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.5105775594711304, + "logits/rejected": -0.49744895100593567, + "logps/chosen": -126.82603454589844, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -206.52752685546875, + "loss": 1.115, + "margin_dpo/margin_mean": 49.97242736816406, + "margin_dpo/margin_std": 79.255859375, + "step": 133 + }, + { + "KL/chosen_KL_mean": -47.19751739501953, + "KL/mean": -73.87967681884766, + "KL/rejected_KL_mean": -100.56185150146484, + "KL/std": 60.19834518432617, + "epoch": 0.19676945668135096, + "fcm_dpo/beta": 0.00804828479886055, + "fcm_dpo/delta": -0.03093547746539116, + "fcm_dpo/margin": 53.36433029174805, + "fcm_dpo/q_t": 0.4034237563610077, + "grad_norm": 18.76936149597168, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.4705553650856018, + "logits/rejected": -0.45708662271499634, + "logps/chosen": -99.26676940917969, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -188.21636962890625, + "loss": 1.0719, + "margin_dpo/margin_mean": 53.36433410644531, + "margin_dpo/margin_std": 69.71368408203125, + "step": 134 + }, + { + "KL/chosen_KL_mean": -52.56504821777344, + "KL/mean": -91.32998657226562, + "KL/rejected_KL_mean": -130.09490966796875, + "KL/std": 81.02059936523438, + "epoch": 0.19823788546255505, + "fcm_dpo/beta": 0.007756436243653297, + "fcm_dpo/delta": -0.21430166065692902, + "fcm_dpo/margin": 77.52987670898438, + "fcm_dpo/q_t": 0.37159401178359985, + "grad_norm": 23.092226028442383, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.4758632481098175, + "logits/rejected": -0.5096943974494934, + "logps/chosen": -102.91890716552734, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -246.0746612548828, + "loss": 0.998, + "margin_dpo/margin_mean": 77.52987670898438, + "margin_dpo/margin_std": 98.24815368652344, + "step": 135 + }, + { + "KL/chosen_KL_mean": -62.72093963623047, + "KL/mean": -88.66222381591797, + "KL/rejected_KL_mean": -114.603515625, + "KL/std": 74.2306137084961, + "epoch": 0.19970631424375918, + "fcm_dpo/beta": 0.00763201666995883, + "fcm_dpo/delta": 0.0035090260207653046, + "fcm_dpo/margin": 51.88257598876953, + "fcm_dpo/q_t": 0.41930729150772095, + "grad_norm": 20.256174087524414, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.4780592918395996, + "logits/rejected": -0.4707057476043701, + "logps/chosen": -127.79344940185547, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -210.92474365234375, + "loss": 1.1452, + "margin_dpo/margin_mean": 51.88257598876953, + "margin_dpo/margin_std": 95.30119323730469, + "step": 136 + }, + { + "KL/chosen_KL_mean": -59.67708206176758, + "KL/mean": -100.20204162597656, + "KL/rejected_KL_mean": -140.7270050048828, + "KL/std": 97.27854919433594, + "epoch": 0.2011747430249633, + "fcm_dpo/beta": 0.007428483106195927, + "fcm_dpo/delta": -0.21530447900295258, + "fcm_dpo/margin": 81.04991912841797, + "fcm_dpo/q_t": 0.3775022029876709, + "grad_norm": 17.79768180847168, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.4339534640312195, + "logits/rejected": -0.46067190170288086, + "logps/chosen": -108.43620300292969, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86376953125, + "logps/rejected": -254.5907745361328, + "loss": 1.0282, + "margin_dpo/margin_mean": 81.0499267578125, + "margin_dpo/margin_std": 117.30170440673828, + "step": 137 + }, + { + "KL/chosen_KL_mean": -62.279048919677734, + "KL/mean": -92.98139953613281, + "KL/rejected_KL_mean": -123.68376159667969, + "KL/std": 73.94004821777344, + "epoch": 0.2026431718061674, + "fcm_dpo/beta": 0.007311869412660599, + "fcm_dpo/delta": -0.051259323954582214, + "fcm_dpo/margin": 61.40470886230469, + "fcm_dpo/q_t": 0.3972048759460449, + "grad_norm": 20.16287612915039, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.4445374608039856, + "logits/rejected": -0.43380314111709595, + "logps/chosen": -122.79869842529297, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -216.88070678710938, + "loss": 1.0574, + "margin_dpo/margin_mean": 61.40470886230469, + "margin_dpo/margin_std": 75.6790771484375, + "step": 138 + }, + { + "KL/chosen_KL_mean": -51.84934616088867, + "KL/mean": -88.24090576171875, + "KL/rejected_KL_mean": -124.6324691772461, + "KL/std": 69.78694152832031, + "epoch": 0.20411160058737152, + "fcm_dpo/beta": 0.007158602587878704, + "fcm_dpo/delta": -0.12749908864498138, + "fcm_dpo/margin": 72.78312683105469, + "fcm_dpo/q_t": 0.3843567967414856, + "grad_norm": 19.195222854614258, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.43394410610198975, + "logits/rejected": -0.4245094060897827, + "logps/chosen": -98.74073028564453, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -204.36044311523438, + "loss": 1.0226, + "margin_dpo/margin_mean": 72.78312683105469, + "margin_dpo/margin_std": 90.51847839355469, + "step": 139 + }, + { + "KL/chosen_KL_mean": -62.725608825683594, + "KL/mean": -95.730224609375, + "KL/rejected_KL_mean": -128.73483276367188, + "KL/std": 77.60523986816406, + "epoch": 0.2055800293685756, + "fcm_dpo/beta": 0.007023262791335583, + "fcm_dpo/delta": -0.06672540307044983, + "fcm_dpo/margin": 66.00922393798828, + "fcm_dpo/q_t": 0.39678555727005005, + "grad_norm": 19.862146377563477, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.38486558198928833, + "logits/rejected": -0.36864161491394043, + "logps/chosen": -121.70032501220703, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28410339355469, + "logps/rejected": -212.01895141601562, + "loss": 1.0714, + "margin_dpo/margin_mean": 66.00922393798828, + "margin_dpo/margin_std": 93.16436767578125, + "step": 140 + }, + { + "KL/chosen_KL_mean": -70.66569519042969, + "KL/mean": -101.82574462890625, + "KL/rejected_KL_mean": -132.9857635498047, + "KL/std": 85.2192611694336, + "epoch": 0.20704845814977973, + "fcm_dpo/beta": 0.00697126192972064, + "fcm_dpo/delta": -0.0359983891248703, + "fcm_dpo/margin": 62.32007598876953, + "fcm_dpo/q_t": 0.40241730213165283, + "grad_norm": 24.785545349121094, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.43056273460388184, + "logits/rejected": -0.4188095033168793, + "logps/chosen": -145.74136352539062, + "logps/ref_chosen": -75.07566833496094, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -231.17803955078125, + "loss": 1.1045, + "margin_dpo/margin_mean": 62.320072174072266, + "margin_dpo/margin_std": 98.26313781738281, + "step": 141 + }, + { + "KL/chosen_KL_mean": -71.45011901855469, + "KL/mean": -107.04524230957031, + "KL/rejected_KL_mean": -142.64035034179688, + "KL/std": 92.68605041503906, + "epoch": 0.20851688693098386, + "fcm_dpo/beta": 0.006883557885885239, + "fcm_dpo/delta": -0.0946403294801712, + "fcm_dpo/margin": 71.19023895263672, + "fcm_dpo/q_t": 0.3939516842365265, + "grad_norm": 25.63075065612793, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.40616393089294434, + "logits/rejected": -0.40541693568229675, + "logps/chosen": -129.47805786132812, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222961425781, + "logps/rejected": -237.22259521484375, + "loss": 1.0855, + "margin_dpo/margin_mean": 71.19023895263672, + "margin_dpo/margin_std": 109.05268859863281, + "step": 142 + }, + { + "KL/chosen_KL_mean": -75.96367645263672, + "KL/mean": -98.91988372802734, + "KL/rejected_KL_mean": -121.87608337402344, + "KL/std": 81.95751953125, + "epoch": 0.20998531571218795, + "fcm_dpo/beta": 0.006882138084620237, + "fcm_dpo/delta": 0.08677390962839127, + "fcm_dpo/margin": 45.912410736083984, + "fcm_dpo/q_t": 0.432314932346344, + "grad_norm": 22.879791259765625, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.36676502227783203, + "logits/rejected": -0.35898709297180176, + "logps/chosen": -133.56011962890625, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -200.8756561279297, + "loss": 1.1959, + "margin_dpo/margin_mean": 45.91241455078125, + "margin_dpo/margin_std": 95.7051010131836, + "step": 143 + }, + { + "KL/chosen_KL_mean": -67.95751953125, + "KL/mean": -95.87046813964844, + "KL/rejected_KL_mean": -123.78343200683594, + "KL/std": 69.62263488769531, + "epoch": 0.21145374449339208, + "fcm_dpo/beta": 0.006947984918951988, + "fcm_dpo/delta": 0.012606319040060043, + "fcm_dpo/margin": 55.825904846191406, + "fcm_dpo/q_t": 0.4123944938182831, + "grad_norm": 20.645727157592773, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.4141218066215515, + "logits/rejected": -0.40556472539901733, + "logps/chosen": -127.86387634277344, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -205.78369140625, + "loss": 1.1116, + "margin_dpo/margin_mean": 55.82590866088867, + "margin_dpo/margin_std": 82.17929077148438, + "step": 144 + }, + { + "KL/chosen_KL_mean": -65.5662841796875, + "KL/mean": -95.84506225585938, + "KL/rejected_KL_mean": -126.12384033203125, + "KL/std": 70.3823013305664, + "epoch": 0.21292217327459617, + "fcm_dpo/beta": 0.00693280715495348, + "fcm_dpo/delta": -0.02072506584227085, + "fcm_dpo/margin": 60.55757141113281, + "fcm_dpo/q_t": 0.4049571752548218, + "grad_norm": 23.787511825561523, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.41279762983322144, + "logits/rejected": -0.39724746346473694, + "logps/chosen": -122.16694641113281, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -203.9901580810547, + "loss": 1.0903, + "margin_dpo/margin_mean": 60.55756378173828, + "margin_dpo/margin_std": 85.34068298339844, + "step": 145 + }, + { + "KL/chosen_KL_mean": -88.61365509033203, + "KL/mean": -111.90090942382812, + "KL/rejected_KL_mean": -135.18817138671875, + "KL/std": 75.67164611816406, + "epoch": 0.2143906020558003, + "fcm_dpo/beta": 0.00698929512873292, + "fcm_dpo/delta": 0.0770314633846283, + "fcm_dpo/margin": 46.57452392578125, + "fcm_dpo/q_t": 0.42611053586006165, + "grad_norm": 27.229778289794922, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.386644184589386, + "logits/rejected": -0.35976487398147583, + "logps/chosen": -154.61410522460938, + "logps/ref_chosen": -66.00045013427734, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -216.89096069335938, + "loss": 1.1846, + "margin_dpo/margin_mean": 46.57452392578125, + "margin_dpo/margin_std": 90.82237243652344, + "step": 146 + }, + { + "KL/chosen_KL_mean": -65.0084457397461, + "KL/mean": -96.55009460449219, + "KL/rejected_KL_mean": -128.09173583984375, + "KL/std": 76.3470687866211, + "epoch": 0.21585903083700442, + "fcm_dpo/beta": 0.006981690879911184, + "fcm_dpo/delta": -0.04241678863763809, + "fcm_dpo/margin": 63.08330535888672, + "fcm_dpo/q_t": 0.4026370644569397, + "grad_norm": 20.30803108215332, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.42511412501335144, + "logits/rejected": -0.4061092436313629, + "logps/chosen": -118.41392517089844, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39060974121094, + "logps/rejected": -199.48236083984375, + "loss": 1.0927, + "margin_dpo/margin_mean": 63.083309173583984, + "margin_dpo/margin_std": 94.804443359375, + "step": 147 + }, + { + "KL/chosen_KL_mean": -63.701988220214844, + "KL/mean": -90.98545837402344, + "KL/rejected_KL_mean": -118.2689208984375, + "KL/std": 76.31551361083984, + "epoch": 0.2173274596182085, + "fcm_dpo/beta": 0.0069200447760522366, + "fcm_dpo/delta": -0.08388624340295792, + "fcm_dpo/margin": 54.566932678222656, + "fcm_dpo/q_t": 0.41639888286590576, + "grad_norm": 18.78589630126953, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.4691488444805145, + "logits/rejected": -0.46223020553588867, + "logps/chosen": -128.63906860351562, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -221.36276245117188, + "loss": 1.1243, + "margin_dpo/margin_mean": 54.566932678222656, + "margin_dpo/margin_std": 80.36856842041016, + "step": 148 + }, + { + "KL/chosen_KL_mean": -60.93719482421875, + "KL/mean": -94.6129150390625, + "KL/rejected_KL_mean": -128.2886199951172, + "KL/std": 68.51680755615234, + "epoch": 0.21879588839941264, + "fcm_dpo/beta": 0.006747937761247158, + "fcm_dpo/delta": -0.05886346101760864, + "fcm_dpo/margin": 67.35143280029297, + "fcm_dpo/q_t": 0.3960561752319336, + "grad_norm": 18.21072769165039, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.4674052298069, + "logits/rejected": -0.45243215560913086, + "logps/chosen": -119.41095733642578, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -227.60336303710938, + "loss": 1.057, + "margin_dpo/margin_mean": 67.3514404296875, + "margin_dpo/margin_std": 81.17979431152344, + "step": 149 + }, + { + "KL/chosen_KL_mean": -53.51652526855469, + "KL/mean": -86.48956298828125, + "KL/rejected_KL_mean": -119.46260833740234, + "KL/std": 79.87619018554688, + "epoch": 0.22026431718061673, + "fcm_dpo/beta": 0.006690857000648975, + "fcm_dpo/delta": -0.04441402480006218, + "fcm_dpo/margin": 65.94608306884766, + "fcm_dpo/q_t": 0.404508501291275, + "grad_norm": 17.715530395507812, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.46521174907684326, + "logits/rejected": -0.46958300471305847, + "logps/chosen": -99.22233581542969, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -202.81021118164062, + "loss": 1.081, + "margin_dpo/margin_mean": 65.94608306884766, + "margin_dpo/margin_std": 95.0557861328125, + "step": 150 + }, + { + "KL/chosen_KL_mean": -67.021728515625, + "KL/mean": -100.35306549072266, + "KL/rejected_KL_mean": -133.6844024658203, + "KL/std": 76.07426452636719, + "epoch": 0.22173274596182085, + "fcm_dpo/beta": 0.006673037074506283, + "fcm_dpo/delta": -0.046944983303546906, + "fcm_dpo/margin": 66.66267395019531, + "fcm_dpo/q_t": 0.39813345670700073, + "grad_norm": 19.35520362854004, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.5319645404815674, + "logits/rejected": -0.5065436363220215, + "logps/chosen": -137.59255981445312, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -234.14822387695312, + "loss": 1.0611, + "margin_dpo/margin_mean": 66.66267395019531, + "margin_dpo/margin_std": 83.35139465332031, + "step": 151 + }, + { + "KL/chosen_KL_mean": -58.77227020263672, + "KL/mean": -99.20213317871094, + "KL/rejected_KL_mean": -139.6320037841797, + "KL/std": 78.30158996582031, + "epoch": 0.22320117474302498, + "fcm_dpo/beta": 0.0065421732142567635, + "fcm_dpo/delta": -0.13599231839179993, + "fcm_dpo/margin": 80.85972595214844, + "fcm_dpo/q_t": 0.3823656737804413, + "grad_norm": 20.457353591918945, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.46430838108062744, + "logits/rejected": -0.46231526136398315, + "logps/chosen": -118.93666076660156, + "logps/ref_chosen": -60.16438674926758, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -245.7724609375, + "loss": 1.0172, + "margin_dpo/margin_mean": 80.85972595214844, + "margin_dpo/margin_std": 96.03659057617188, + "step": 152 + }, + { + "KL/chosen_KL_mean": -60.03376007080078, + "KL/mean": -94.54715728759766, + "KL/rejected_KL_mean": -129.060546875, + "KL/std": 85.97286224365234, + "epoch": 0.22466960352422907, + "fcm_dpo/beta": 0.006446994375437498, + "fcm_dpo/delta": -0.04709509760141373, + "fcm_dpo/margin": 69.02679443359375, + "fcm_dpo/q_t": 0.4036220908164978, + "grad_norm": 15.283037185668945, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.4555599093437195, + "logits/rejected": -0.4504218101501465, + "logps/chosen": -116.34903717041016, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -214.7163848876953, + "loss": 1.091, + "margin_dpo/margin_mean": 69.02678680419922, + "margin_dpo/margin_std": 105.63395690917969, + "step": 153 + }, + { + "KL/chosen_KL_mean": -71.58514404296875, + "KL/mean": -104.48786926269531, + "KL/rejected_KL_mean": -137.3905792236328, + "KL/std": 85.70448303222656, + "epoch": 0.2261380323054332, + "fcm_dpo/beta": 0.006422577425837517, + "fcm_dpo/delta": -0.023874616250395775, + "fcm_dpo/margin": 65.80543518066406, + "fcm_dpo/q_t": 0.40717241168022156, + "grad_norm": 18.80577278137207, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.46241965889930725, + "logits/rejected": -0.4656856656074524, + "logps/chosen": -134.3277130126953, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -241.63478088378906, + "loss": 1.1225, + "margin_dpo/margin_mean": 65.8054428100586, + "margin_dpo/margin_std": 109.62528991699219, + "step": 154 + }, + { + "KL/chosen_KL_mean": -65.22631072998047, + "KL/mean": -98.56913757324219, + "KL/rejected_KL_mean": -131.91197204589844, + "KL/std": 79.4256591796875, + "epoch": 0.2276064610866373, + "fcm_dpo/beta": 0.006380689330399036, + "fcm_dpo/delta": -0.02670937031507492, + "fcm_dpo/margin": 66.6856689453125, + "fcm_dpo/q_t": 0.4044456481933594, + "grad_norm": 19.153099060058594, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.4766504764556885, + "logits/rejected": -0.4583345055580139, + "logps/chosen": -125.8794937133789, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -209.4041748046875, + "loss": 1.0951, + "margin_dpo/margin_mean": 66.6856689453125, + "margin_dpo/margin_std": 98.61933135986328, + "step": 155 + }, + { + "KL/chosen_KL_mean": -91.69331359863281, + "KL/mean": -110.75820922851562, + "KL/rejected_KL_mean": -129.82310485839844, + "KL/std": 84.64684295654297, + "epoch": 0.2290748898678414, + "fcm_dpo/beta": 0.006421338301151991, + "fcm_dpo/delta": 0.05203431844711304, + "fcm_dpo/margin": 38.129791259765625, + "fcm_dpo/q_t": 0.4446510076522827, + "grad_norm": 28.152284622192383, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.40159350633621216, + "logits/rejected": -0.3953508138656616, + "logps/chosen": -161.18519592285156, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.16929626464844, + "logps/rejected": -206.99240112304688, + "loss": 1.2762, + "margin_dpo/margin_mean": 38.129791259765625, + "margin_dpo/margin_std": 111.29301452636719, + "step": 156 + }, + { + "KL/chosen_KL_mean": -82.54095458984375, + "KL/mean": -125.1497573852539, + "KL/rejected_KL_mean": -167.75857543945312, + "KL/std": 92.44023895263672, + "epoch": 0.2305433186490455, + "fcm_dpo/beta": 0.006253876723349094, + "fcm_dpo/delta": -0.14177267253398895, + "fcm_dpo/margin": 85.21761322021484, + "fcm_dpo/q_t": 0.3786003589630127, + "grad_norm": 23.096948623657227, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.3750728964805603, + "logits/rejected": -0.3818325996398926, + "logps/chosen": -143.90939331054688, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -275.4049377441406, + "loss": 1.0271, + "margin_dpo/margin_mean": 85.21761322021484, + "margin_dpo/margin_std": 105.14042663574219, + "step": 157 + }, + { + "KL/chosen_KL_mean": -83.61564636230469, + "KL/mean": -128.85971069335938, + "KL/rejected_KL_mean": -174.10379028320312, + "KL/std": 114.70477294921875, + "epoch": 0.23201174743024963, + "fcm_dpo/beta": 0.0060958778485655785, + "fcm_dpo/delta": -0.1604328155517578, + "fcm_dpo/margin": 90.4881591796875, + "fcm_dpo/q_t": 0.38691407442092896, + "grad_norm": 19.036361694335938, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.37780940532684326, + "logits/rejected": -0.39571529626846313, + "logps/chosen": -141.2285614013672, + "logps/ref_chosen": -57.612918853759766, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -287.7984619140625, + "loss": 1.0566, + "margin_dpo/margin_mean": 90.4881591796875, + "margin_dpo/margin_std": 138.78392028808594, + "step": 158 + }, + { + "KL/chosen_KL_mean": -93.97590637207031, + "KL/mean": -124.90428161621094, + "KL/rejected_KL_mean": -155.83265686035156, + "KL/std": 100.59730529785156, + "epoch": 0.23348017621145375, + "fcm_dpo/beta": 0.006093316245824099, + "fcm_dpo/delta": 0.02347235381603241, + "fcm_dpo/margin": 61.85674285888672, + "fcm_dpo/q_t": 0.4160599112510681, + "grad_norm": 25.451343536376953, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.39171531796455383, + "logits/rejected": -0.36167389154434204, + "logps/chosen": -175.5362548828125, + "logps/ref_chosen": -81.56034851074219, + "logps/ref_rejected": -88.89871215820312, + "logps/rejected": -244.73135375976562, + "loss": 1.1492, + "margin_dpo/margin_mean": 61.85674285888672, + "margin_dpo/margin_std": 109.38902282714844, + "step": 159 + }, + { + "KL/chosen_KL_mean": -96.04105377197266, + "KL/mean": -137.7037811279297, + "KL/rejected_KL_mean": -179.3665313720703, + "KL/std": 107.62297058105469, + "epoch": 0.23494860499265785, + "fcm_dpo/beta": 0.0059524280950427055, + "fcm_dpo/delta": -0.10199404507875443, + "fcm_dpo/margin": 83.32546997070312, + "fcm_dpo/q_t": 0.39600396156311035, + "grad_norm": 22.877182006835938, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.35929036140441895, + "logits/rejected": -0.35081833600997925, + "logps/chosen": -161.77194213867188, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -276.5843505859375, + "loss": 1.0904, + "margin_dpo/margin_mean": 83.32546997070312, + "margin_dpo/margin_std": 134.5235595703125, + "step": 160 + }, + { + "KL/chosen_KL_mean": -81.80270385742188, + "KL/mean": -118.25526428222656, + "KL/rejected_KL_mean": -154.70782470703125, + "KL/std": 84.72265625, + "epoch": 0.23641703377386197, + "fcm_dpo/beta": 0.005920952185988426, + "fcm_dpo/delta": -0.033098410815000534, + "fcm_dpo/margin": 72.9051284790039, + "fcm_dpo/q_t": 0.40439367294311523, + "grad_norm": 21.35667610168457, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.38000649213790894, + "logits/rejected": -0.38120192289352417, + "logps/chosen": -134.23916625976562, + "logps/ref_chosen": -52.43647003173828, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -238.13877868652344, + "loss": 1.0923, + "margin_dpo/margin_mean": 72.9051284790039, + "margin_dpo/margin_std": 107.10279846191406, + "step": 161 + }, + { + "KL/chosen_KL_mean": -79.02703094482422, + "KL/mean": -114.97813415527344, + "KL/rejected_KL_mean": -150.92922973632812, + "KL/std": 93.30059814453125, + "epoch": 0.23788546255506607, + "fcm_dpo/beta": 0.0058417608961462975, + "fcm_dpo/delta": -0.022580057382583618, + "fcm_dpo/margin": 71.90220642089844, + "fcm_dpo/q_t": 0.40738850831985474, + "grad_norm": 21.55710792541504, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.4415048360824585, + "logits/rejected": -0.42762479186058044, + "logps/chosen": -141.6376190185547, + "logps/ref_chosen": -62.6105842590332, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -240.31982421875, + "loss": 1.1092, + "margin_dpo/margin_mean": 71.90221405029297, + "margin_dpo/margin_std": 110.78158569335938, + "step": 162 + }, + { + "KL/chosen_KL_mean": -88.2254638671875, + "KL/mean": -123.40544128417969, + "KL/rejected_KL_mean": -158.58541870117188, + "KL/std": 93.95071411132812, + "epoch": 0.2393538913362702, + "fcm_dpo/beta": 0.00587341096252203, + "fcm_dpo/delta": -0.013822587206959724, + "fcm_dpo/margin": 70.35995483398438, + "fcm_dpo/q_t": 0.4094018042087555, + "grad_norm": 19.775178909301758, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.3558083772659302, + "logits/rejected": -0.33801817893981934, + "logps/chosen": -143.24710083007812, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.418212890625, + "logps/rejected": -234.00363159179688, + "loss": 1.1137, + "margin_dpo/margin_mean": 70.35994720458984, + "margin_dpo/margin_std": 112.65255737304688, + "step": 163 + }, + { + "KL/chosen_KL_mean": -80.43484497070312, + "KL/mean": -123.2110366821289, + "KL/rejected_KL_mean": -165.98721313476562, + "KL/std": 91.2215576171875, + "epoch": 0.24082232011747431, + "fcm_dpo/beta": 0.005744011141359806, + "fcm_dpo/delta": -0.09778200834989548, + "fcm_dpo/margin": 85.5523681640625, + "fcm_dpo/q_t": 0.3890402913093567, + "grad_norm": 21.015703201293945, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.34407860040664673, + "logits/rejected": -0.3298642039299011, + "logps/chosen": -136.0755157470703, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -245.65184020996094, + "loss": 1.0381, + "margin_dpo/margin_mean": 85.5523681640625, + "margin_dpo/margin_std": 105.12376403808594, + "step": 164 + }, + { + "KL/chosen_KL_mean": -87.14698028564453, + "KL/mean": -114.9844970703125, + "KL/rejected_KL_mean": -142.822021484375, + "KL/std": 77.58013916015625, + "epoch": 0.2422907488986784, + "fcm_dpo/beta": 0.0058072819374501705, + "fcm_dpo/delta": 0.07930518686771393, + "fcm_dpo/margin": 55.67503356933594, + "fcm_dpo/q_t": 0.42748406529426575, + "grad_norm": 21.678955078125, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.3774159252643585, + "logits/rejected": -0.351327121257782, + "logps/chosen": -148.45767211914062, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -216.49261474609375, + "loss": 1.1741, + "margin_dpo/margin_mean": 55.67503356933594, + "margin_dpo/margin_std": 103.26383972167969, + "step": 165 + }, + { + "KL/chosen_KL_mean": -77.0203857421875, + "KL/mean": -128.22457885742188, + "KL/rejected_KL_mean": -179.4287872314453, + "KL/std": 112.74888610839844, + "epoch": 0.24375917767988253, + "fcm_dpo/beta": 0.00565761886537075, + "fcm_dpo/delta": -0.19167430698871613, + "fcm_dpo/margin": 102.40840148925781, + "fcm_dpo/q_t": 0.3809058368206024, + "grad_norm": 17.48912239074707, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.30309057235717773, + "logits/rejected": -0.31303203105926514, + "logps/chosen": -128.0039825439453, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -277.52392578125, + "loss": 1.019, + "margin_dpo/margin_mean": 102.40840148925781, + "margin_dpo/margin_std": 141.89622497558594, + "step": 166 + }, + { + "KL/chosen_KL_mean": -78.29979705810547, + "KL/mean": -132.09503173828125, + "KL/rejected_KL_mean": -185.89027404785156, + "KL/std": 102.53227233886719, + "epoch": 0.24522760646108663, + "fcm_dpo/beta": 0.005461276508867741, + "fcm_dpo/delta": -0.19935590028762817, + "fcm_dpo/margin": 107.59046173095703, + "fcm_dpo/q_t": 0.368857204914093, + "grad_norm": 18.757923126220703, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.34970927238464355, + "logits/rejected": -0.35314348340034485, + "logps/chosen": -128.72389221191406, + "logps/ref_chosen": -50.424095153808594, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -281.92071533203125, + "loss": 0.9757, + "margin_dpo/margin_mean": 107.59046936035156, + "margin_dpo/margin_std": 118.7750015258789, + "step": 167 + }, + { + "KL/chosen_KL_mean": -84.44252014160156, + "KL/mean": -124.02970886230469, + "KL/rejected_KL_mean": -163.61688232421875, + "KL/std": 95.80686950683594, + "epoch": 0.24669603524229075, + "fcm_dpo/beta": 0.005374173633754253, + "fcm_dpo/delta": -0.02677498757839203, + "fcm_dpo/margin": 79.17437744140625, + "fcm_dpo/q_t": 0.4044000506401062, + "grad_norm": 20.719741821289062, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.32083988189697266, + "logits/rejected": -0.3103061020374298, + "logps/chosen": -133.9053497314453, + "logps/ref_chosen": -49.462825775146484, + "logps/ref_rejected": -75.30855560302734, + "logps/rejected": -238.92544555664062, + "loss": 1.0804, + "margin_dpo/margin_mean": 79.17437744140625, + "margin_dpo/margin_std": 107.18096923828125, + "step": 168 + }, + { + "KL/chosen_KL_mean": -86.89261627197266, + "KL/mean": -121.52689361572266, + "KL/rejected_KL_mean": -156.1611785888672, + "KL/std": 94.73361206054688, + "epoch": 0.24816446402349487, + "fcm_dpo/beta": 0.005301401484757662, + "fcm_dpo/delta": -0.07176721096038818, + "fcm_dpo/margin": 69.26856231689453, + "fcm_dpo/q_t": 0.4205383062362671, + "grad_norm": 21.35027503967285, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.3565632700920105, + "logits/rejected": -0.34026655554771423, + "logps/chosen": -146.69606018066406, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -239.50692749023438, + "loss": 1.1631, + "margin_dpo/margin_mean": 69.26856994628906, + "margin_dpo/margin_std": 130.9052734375, + "step": 169 + }, + { + "KL/chosen_KL_mean": -77.08535766601562, + "KL/mean": -117.763671875, + "KL/rejected_KL_mean": -158.44198608398438, + "KL/std": 90.00228881835938, + "epoch": 0.24963289280469897, + "fcm_dpo/beta": 0.0052484553307294846, + "fcm_dpo/delta": -0.029036525636911392, + "fcm_dpo/margin": 81.35664367675781, + "fcm_dpo/q_t": 0.401960551738739, + "grad_norm": 17.703929901123047, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.33288633823394775, + "logits/rejected": -0.3227166533470154, + "logps/chosen": -126.55712890625, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -234.3593292236328, + "loss": 1.0812, + "margin_dpo/margin_mean": 81.35664367675781, + "margin_dpo/margin_std": 107.8404541015625, + "step": 170 + }, + { + "KL/chosen_KL_mean": -111.29542541503906, + "KL/mean": -144.80523681640625, + "KL/rejected_KL_mean": -178.31504821777344, + "KL/std": 104.81484985351562, + "epoch": 0.2511013215859031, + "fcm_dpo/beta": 0.005305338650941849, + "fcm_dpo/delta": 0.046089254319667816, + "fcm_dpo/margin": 67.01963806152344, + "fcm_dpo/q_t": 0.42465952038764954, + "grad_norm": 29.754013061523438, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.35111328959465027, + "logits/rejected": -0.3345106542110443, + "logps/chosen": -195.79473876953125, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -287.6971435546875, + "loss": 1.1718, + "margin_dpo/margin_mean": 67.01963806152344, + "margin_dpo/margin_std": 134.08868408203125, + "step": 171 + }, + { + "KL/chosen_KL_mean": -98.73640441894531, + "KL/mean": -133.205810546875, + "KL/rejected_KL_mean": -167.67523193359375, + "KL/std": 101.24346160888672, + "epoch": 0.2525697503671072, + "fcm_dpo/beta": 0.005340388976037502, + "fcm_dpo/delta": 0.033069491386413574, + "fcm_dpo/margin": 68.93881225585938, + "fcm_dpo/q_t": 0.4173203706741333, + "grad_norm": 22.75365447998047, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.38472980260849, + "logits/rejected": -0.3684314489364624, + "logps/chosen": -167.39031982421875, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -253.1118927001953, + "loss": 1.1366, + "margin_dpo/margin_mean": 68.9388198852539, + "margin_dpo/margin_std": 114.0185775756836, + "step": 172 + }, + { + "KL/chosen_KL_mean": -89.91429901123047, + "KL/mean": -124.62344360351562, + "KL/rejected_KL_mean": -159.3325958251953, + "KL/std": 94.92538452148438, + "epoch": 0.2540381791483113, + "fcm_dpo/beta": 0.0053945546969771385, + "fcm_dpo/delta": 0.026172153651714325, + "fcm_dpo/margin": 69.41828918457031, + "fcm_dpo/q_t": 0.41492652893066406, + "grad_norm": 21.469446182250977, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.40895795822143555, + "logits/rejected": -0.3820996880531311, + "logps/chosen": -152.96517944335938, + "logps/ref_chosen": -63.050880432128906, + "logps/ref_rejected": -78.68392181396484, + "logps/rejected": -238.01651000976562, + "loss": 1.1124, + "margin_dpo/margin_mean": 69.41828918457031, + "margin_dpo/margin_std": 98.99739074707031, + "step": 173 + }, + { + "KL/chosen_KL_mean": -85.3913345336914, + "KL/mean": -126.82902526855469, + "KL/rejected_KL_mean": -168.26670837402344, + "KL/std": 99.21536254882812, + "epoch": 0.2555066079295154, + "fcm_dpo/beta": 0.005375551991164684, + "fcm_dpo/delta": -0.04802219197154045, + "fcm_dpo/margin": 82.8753662109375, + "fcm_dpo/q_t": 0.40161585807800293, + "grad_norm": 21.169090270996094, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.326399028301239, + "logits/rejected": -0.3440948724746704, + "logps/chosen": -138.75430297851562, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -270.17791748046875, + "loss": 1.0841, + "margin_dpo/margin_mean": 82.8753662109375, + "margin_dpo/margin_std": 118.40190887451172, + "step": 174 + }, + { + "KL/chosen_KL_mean": -78.79037475585938, + "KL/mean": -135.27874755859375, + "KL/rejected_KL_mean": -191.76712036132812, + "KL/std": 107.09109497070312, + "epoch": 0.25697503671071953, + "fcm_dpo/beta": 0.005171348340809345, + "fcm_dpo/delta": -0.195995032787323, + "fcm_dpo/margin": 112.97673797607422, + "fcm_dpo/q_t": 0.3668813109397888, + "grad_norm": 27.521982192993164, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.3237273693084717, + "logits/rejected": -0.31100332736968994, + "logps/chosen": -124.20813751220703, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -281.2729187011719, + "loss": 0.9595, + "margin_dpo/margin_mean": 112.97673797607422, + "margin_dpo/margin_std": 112.159912109375, + "step": 175 + }, + { + "KL/chosen_KL_mean": -85.66651916503906, + "KL/mean": -133.4976348876953, + "KL/rejected_KL_mean": -181.3287353515625, + "KL/std": 107.46882629394531, + "epoch": 0.25844346549192365, + "fcm_dpo/beta": 0.005067367106676102, + "fcm_dpo/delta": -0.08898322284221649, + "fcm_dpo/margin": 95.66221618652344, + "fcm_dpo/q_t": 0.3938441872596741, + "grad_norm": 18.892227172851562, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.3252803385257721, + "logits/rejected": -0.34228193759918213, + "logps/chosen": -136.11935424804688, + "logps/ref_chosen": -50.452842712402344, + "logps/ref_rejected": -95.5589599609375, + "logps/rejected": -276.8876953125, + "loss": 1.0478, + "margin_dpo/margin_mean": 95.66221618652344, + "margin_dpo/margin_std": 125.021484375, + "step": 176 + }, + { + "KL/chosen_KL_mean": -99.08952331542969, + "KL/mean": -146.7650146484375, + "KL/rejected_KL_mean": -194.4405059814453, + "KL/std": 116.030517578125, + "epoch": 0.2599118942731278, + "fcm_dpo/beta": 0.004990983754396439, + "fcm_dpo/delta": -0.07962613552808762, + "fcm_dpo/margin": 95.35098266601562, + "fcm_dpo/q_t": 0.3944365382194519, + "grad_norm": 26.08759117126465, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.36417263746261597, + "logits/rejected": -0.3568111062049866, + "logps/chosen": -160.30599975585938, + "logps/ref_chosen": -61.216468811035156, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -290.33428955078125, + "loss": 1.049, + "margin_dpo/margin_mean": 95.35098266601562, + "margin_dpo/margin_std": 123.23958587646484, + "step": 177 + }, + { + "KL/chosen_KL_mean": -109.24971008300781, + "KL/mean": -168.47171020507812, + "KL/rejected_KL_mean": -227.69369506835938, + "KL/std": 135.19757080078125, + "epoch": 0.26138032305433184, + "fcm_dpo/beta": 0.00480748200789094, + "fcm_dpo/delta": -0.18060356378555298, + "fcm_dpo/margin": 118.44398498535156, + "fcm_dpo/q_t": 0.37657660245895386, + "grad_norm": 27.715505599975586, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.1987697333097458, + "logits/rejected": -0.18940778076648712, + "logps/chosen": -167.51449584960938, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.3653335571289, + "logps/rejected": -333.05902099609375, + "loss": 1.0039, + "margin_dpo/margin_mean": 118.4439926147461, + "margin_dpo/margin_std": 148.56935119628906, + "step": 178 + }, + { + "KL/chosen_KL_mean": -115.18132019042969, + "KL/mean": -156.70834350585938, + "KL/rejected_KL_mean": -198.23538208007812, + "KL/std": 117.613525390625, + "epoch": 0.26284875183553597, + "fcm_dpo/beta": 0.00476008839905262, + "fcm_dpo/delta": 0.004362210631370544, + "fcm_dpo/margin": 83.05406188964844, + "fcm_dpo/q_t": 0.41322624683380127, + "grad_norm": 28.284343719482422, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.30069026350975037, + "logits/rejected": -0.3113616406917572, + "logps/chosen": -176.23965454101562, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -288.7632141113281, + "loss": 1.1442, + "margin_dpo/margin_mean": 83.05406188964844, + "margin_dpo/margin_std": 147.2235107421875, + "step": 179 + }, + { + "KL/chosen_KL_mean": -99.40576171875, + "KL/mean": -153.18653869628906, + "KL/rejected_KL_mean": -206.9673309326172, + "KL/std": 105.46197509765625, + "epoch": 0.2643171806167401, + "fcm_dpo/beta": 0.004700476303696632, + "fcm_dpo/delta": -0.11138296127319336, + "fcm_dpo/margin": 107.56156158447266, + "fcm_dpo/q_t": 0.3854052722454071, + "grad_norm": 20.0328369140625, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.26214757561683655, + "logits/rejected": -0.25771957635879517, + "logps/chosen": -153.74847412109375, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -305.1791687011719, + "loss": 1.0301, + "margin_dpo/margin_mean": 107.56156158447266, + "margin_dpo/margin_std": 131.99119567871094, + "step": 180 + }, + { + "KL/chosen_KL_mean": -86.6176986694336, + "KL/mean": -119.9801254272461, + "KL/rejected_KL_mean": -153.34255981445312, + "KL/std": 96.91038513183594, + "epoch": 0.2657856093979442, + "fcm_dpo/beta": 0.004746724851429462, + "fcm_dpo/delta": 0.08582982420921326, + "fcm_dpo/margin": 66.72486877441406, + "fcm_dpo/q_t": 0.428183913230896, + "grad_norm": 23.883167266845703, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.3121437132358551, + "logits/rejected": -0.2885586619377136, + "logps/chosen": -141.6181640625, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -214.99871826171875, + "loss": 1.174, + "margin_dpo/margin_mean": 66.72486877441406, + "margin_dpo/margin_std": 122.89712524414062, + "step": 181 + }, + { + "KL/chosen_KL_mean": -82.56523895263672, + "KL/mean": -142.1549530029297, + "KL/rejected_KL_mean": -201.74465942382812, + "KL/std": 118.71504211425781, + "epoch": 0.26725403817914833, + "fcm_dpo/beta": 0.00462943222373724, + "fcm_dpo/delta": -0.16154250502586365, + "fcm_dpo/margin": 119.17942810058594, + "fcm_dpo/q_t": 0.37539470195770264, + "grad_norm": 18.672292709350586, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.2760277986526489, + "logits/rejected": -0.28654640913009644, + "logps/chosen": -123.673095703125, + "logps/ref_chosen": -41.107852935791016, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -291.2662353515625, + "loss": 1.0089, + "margin_dpo/margin_mean": 119.17942810058594, + "margin_dpo/margin_std": 142.9564971923828, + "step": 182 + }, + { + "KL/chosen_KL_mean": -120.36442565917969, + "KL/mean": -152.0966339111328, + "KL/rejected_KL_mean": -183.828857421875, + "KL/std": 97.2325210571289, + "epoch": 0.2687224669603524, + "fcm_dpo/beta": 0.004568018019199371, + "fcm_dpo/delta": -0.04479080066084862, + "fcm_dpo/margin": 63.46442413330078, + "fcm_dpo/q_t": 0.43251746892929077, + "grad_norm": 21.404373168945312, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.29337215423583984, + "logits/rejected": -0.28445976972579956, + "logps/chosen": -177.8889923095703, + "logps/ref_chosen": -57.52456283569336, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -259.8045654296875, + "loss": 1.1828, + "margin_dpo/margin_mean": 63.46442413330078, + "margin_dpo/margin_std": 107.15849304199219, + "step": 183 + }, + { + "KL/chosen_KL_mean": -99.98910522460938, + "KL/mean": -133.18710327148438, + "KL/rejected_KL_mean": -166.38510131835938, + "KL/std": 91.612060546875, + "epoch": 0.2701908957415565, + "fcm_dpo/beta": 0.0045661963522434235, + "fcm_dpo/delta": -0.003989125601947308, + "fcm_dpo/margin": 66.39598083496094, + "fcm_dpo/q_t": 0.4311205744743347, + "grad_norm": 19.19173240661621, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.33634817600250244, + "logits/rejected": -0.32674121856689453, + "logps/chosen": -158.5340576171875, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -243.0191650390625, + "loss": 1.1735, + "margin_dpo/margin_mean": 66.39598083496094, + "margin_dpo/margin_std": 115.35989379882812, + "step": 184 + }, + { + "KL/chosen_KL_mean": -108.26544189453125, + "KL/mean": -134.82513427734375, + "KL/rejected_KL_mean": -161.38482666015625, + "KL/std": 105.25448608398438, + "epoch": 0.27165932452276065, + "fcm_dpo/beta": 0.004670283757150173, + "fcm_dpo/delta": 0.15583746135234833, + "fcm_dpo/margin": 53.11936950683594, + "fcm_dpo/q_t": 0.4466909170150757, + "grad_norm": 20.08793067932129, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.28927063941955566, + "logits/rejected": -0.26542210578918457, + "logps/chosen": -170.29129028320312, + "logps/ref_chosen": -62.025848388671875, + "logps/ref_rejected": -73.7625961303711, + "logps/rejected": -235.1474151611328, + "loss": 1.2331, + "margin_dpo/margin_mean": 53.1193733215332, + "margin_dpo/margin_std": 126.44635772705078, + "step": 185 + }, + { + "KL/chosen_KL_mean": -99.16331481933594, + "KL/mean": -149.17770385742188, + "KL/rejected_KL_mean": -199.19210815429688, + "KL/std": 105.57518768310547, + "epoch": 0.27312775330396477, + "fcm_dpo/beta": 0.004654415883123875, + "fcm_dpo/delta": -0.06891189515590668, + "fcm_dpo/margin": 100.02880096435547, + "fcm_dpo/q_t": 0.39436179399490356, + "grad_norm": 22.96204948425293, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.36928582191467285, + "logits/rejected": -0.3490529954433441, + "logps/chosen": -168.51678466796875, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -287.2645568847656, + "loss": 1.0462, + "margin_dpo/margin_mean": 100.02880096435547, + "margin_dpo/margin_std": 121.30867767333984, + "step": 186 + }, + { + "KL/chosen_KL_mean": -92.08950805664062, + "KL/mean": -133.75186157226562, + "KL/rejected_KL_mean": -175.4142303466797, + "KL/std": 100.45945739746094, + "epoch": 0.2745961820851689, + "fcm_dpo/beta": 0.004670889116823673, + "fcm_dpo/delta": 0.01085655763745308, + "fcm_dpo/margin": 83.32472229003906, + "fcm_dpo/q_t": 0.4105120003223419, + "grad_norm": 21.904727935791016, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.3443525433540344, + "logits/rejected": -0.3386707901954651, + "logps/chosen": -144.84596252441406, + "logps/ref_chosen": -52.7564582824707, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -257.3833312988281, + "loss": 1.096, + "margin_dpo/margin_mean": 83.32472229003906, + "margin_dpo/margin_std": 108.60208129882812, + "step": 187 + }, + { + "KL/chosen_KL_mean": -86.056640625, + "KL/mean": -135.95150756835938, + "KL/rejected_KL_mean": -185.84640502929688, + "KL/std": 111.05039978027344, + "epoch": 0.27606461086637296, + "fcm_dpo/beta": 0.004614308476448059, + "fcm_dpo/delta": -0.06334332376718521, + "fcm_dpo/margin": 99.78976440429688, + "fcm_dpo/q_t": 0.3958631753921509, + "grad_norm": 25.85726547241211, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.3261992633342743, + "logits/rejected": -0.33161741495132446, + "logps/chosen": -135.4721221923828, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -275.3868408203125, + "loss": 1.0498, + "margin_dpo/margin_mean": 99.7897720336914, + "margin_dpo/margin_std": 121.95850372314453, + "step": 188 + }, + { + "KL/chosen_KL_mean": -99.45054626464844, + "KL/mean": -141.6304931640625, + "KL/rejected_KL_mean": -183.81045532226562, + "KL/std": 111.99412536621094, + "epoch": 0.2775330396475771, + "fcm_dpo/beta": 0.004608414135873318, + "fcm_dpo/delta": 0.011658096686005592, + "fcm_dpo/margin": 84.35992431640625, + "fcm_dpo/q_t": 0.4146321415901184, + "grad_norm": 20.663795471191406, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.3318672776222229, + "logits/rejected": -0.31224292516708374, + "logps/chosen": -151.84950256347656, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -255.97781372070312, + "loss": 1.1224, + "margin_dpo/margin_mean": 84.35991668701172, + "margin_dpo/margin_std": 137.48220825195312, + "step": 189 + }, + { + "KL/chosen_KL_mean": -106.70040893554688, + "KL/mean": -155.38369750976562, + "KL/rejected_KL_mean": -204.06698608398438, + "KL/std": 119.2169418334961, + "epoch": 0.2790014684287812, + "fcm_dpo/beta": 0.004610296338796616, + "fcm_dpo/delta": -0.05222197249531746, + "fcm_dpo/margin": 97.3665771484375, + "fcm_dpo/q_t": 0.4002448320388794, + "grad_norm": 17.597808837890625, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.367323637008667, + "logits/rejected": -0.35587793588638306, + "logps/chosen": -171.3834686279297, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -306.61749267578125, + "loss": 1.085, + "margin_dpo/margin_mean": 97.3665771484375, + "margin_dpo/margin_std": 137.46681213378906, + "step": 190 + }, + { + "KL/chosen_KL_mean": -99.138916015625, + "KL/mean": -169.95510864257812, + "KL/rejected_KL_mean": -240.77130126953125, + "KL/std": 138.72772216796875, + "epoch": 0.28046989720998533, + "fcm_dpo/beta": 0.0043829334899783134, + "fcm_dpo/delta": -0.23674961924552917, + "fcm_dpo/margin": 141.63238525390625, + "fcm_dpo/q_t": 0.3627857565879822, + "grad_norm": 18.488035202026367, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.3747413754463196, + "logits/rejected": -0.35563361644744873, + "logps/chosen": -167.79779052734375, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -350.9109802246094, + "loss": 0.9539, + "margin_dpo/margin_mean": 141.63238525390625, + "margin_dpo/margin_std": 152.71173095703125, + "step": 191 + }, + { + "KL/chosen_KL_mean": -128.03395080566406, + "KL/mean": -174.38095092773438, + "KL/rejected_KL_mean": -220.72796630859375, + "KL/std": 119.869873046875, + "epoch": 0.28193832599118945, + "fcm_dpo/beta": 0.0043565696105360985, + "fcm_dpo/delta": -0.004114950075745583, + "fcm_dpo/margin": 92.69398498535156, + "fcm_dpo/q_t": 0.4108489751815796, + "grad_norm": 22.34331703186035, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.3097224831581116, + "logits/rejected": -0.3084886074066162, + "logps/chosen": -197.7608642578125, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -324.04931640625, + "loss": 1.1169, + "margin_dpo/margin_mean": 92.69398498535156, + "margin_dpo/margin_std": 148.20654296875, + "step": 192 + }, + { + "KL/chosen_KL_mean": -128.42974853515625, + "KL/mean": -156.01699829101562, + "KL/rejected_KL_mean": -183.60421752929688, + "KL/std": 111.40166473388672, + "epoch": 0.2834067547723935, + "fcm_dpo/beta": 0.004361086059361696, + "fcm_dpo/delta": 0.038300659507513046, + "fcm_dpo/margin": 55.174468994140625, + "fcm_dpo/q_t": 0.44291800260543823, + "grad_norm": 26.714811325073242, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.31071868538856506, + "logits/rejected": -0.2987961769104004, + "logps/chosen": -188.6202392578125, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -260.01177978515625, + "loss": 1.2515, + "margin_dpo/margin_mean": 55.17446517944336, + "margin_dpo/margin_std": 142.05120849609375, + "step": 193 + }, + { + "KL/chosen_KL_mean": -80.51405334472656, + "KL/mean": -124.77084350585938, + "KL/rejected_KL_mean": -169.02764892578125, + "KL/std": 93.25508117675781, + "epoch": 0.28487518355359764, + "fcm_dpo/beta": 0.004372420255094767, + "fcm_dpo/delta": 0.013335110619664192, + "fcm_dpo/margin": 88.51360321044922, + "fcm_dpo/q_t": 0.40928915143013, + "grad_norm": 18.569744110107422, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.22196577489376068, + "logits/rejected": -0.20398879051208496, + "logps/chosen": -118.35442352294922, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -229.71243286132812, + "loss": 1.0846, + "margin_dpo/margin_mean": 88.51359558105469, + "margin_dpo/margin_std": 102.0927963256836, + "step": 194 + }, + { + "KL/chosen_KL_mean": -125.90398406982422, + "KL/mean": -173.78524780273438, + "KL/rejected_KL_mean": -221.66653442382812, + "KL/std": 114.66876220703125, + "epoch": 0.28634361233480177, + "fcm_dpo/beta": 0.004376476630568504, + "fcm_dpo/delta": -0.019938159734010696, + "fcm_dpo/margin": 95.7625503540039, + "fcm_dpo/q_t": 0.4035083055496216, + "grad_norm": 21.247806549072266, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.28381267189979553, + "logits/rejected": -0.3019316792488098, + "logps/chosen": -180.79556274414062, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -318.4375, + "loss": 1.0719, + "margin_dpo/margin_mean": 95.76254272460938, + "margin_dpo/margin_std": 117.87249755859375, + "step": 195 + }, + { + "KL/chosen_KL_mean": -98.61161041259766, + "KL/mean": -152.5880584716797, + "KL/rejected_KL_mean": -206.56451416015625, + "KL/std": 116.51144409179688, + "epoch": 0.2878120411160059, + "fcm_dpo/beta": 0.004304712638258934, + "fcm_dpo/delta": -0.06879311800003052, + "fcm_dpo/margin": 107.95289611816406, + "fcm_dpo/q_t": 0.3957948684692383, + "grad_norm": 16.711626052856445, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.31423407793045044, + "logits/rejected": -0.29917240142822266, + "logps/chosen": -151.8568572998047, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -282.6174621582031, + "loss": 1.0614, + "margin_dpo/margin_mean": 107.95289611816406, + "margin_dpo/margin_std": 139.44515991210938, + "step": 196 + }, + { + "KL/chosen_KL_mean": -100.81398010253906, + "KL/mean": -142.37448120117188, + "KL/rejected_KL_mean": -183.9349822998047, + "KL/std": 100.89237976074219, + "epoch": 0.28928046989721, + "fcm_dpo/beta": 0.004351750016212463, + "fcm_dpo/delta": 0.03937269002199173, + "fcm_dpo/margin": 83.12100982666016, + "fcm_dpo/q_t": 0.4173157811164856, + "grad_norm": 18.018680572509766, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.303945392370224, + "logits/rejected": -0.2977169454097748, + "logps/chosen": -161.23431396484375, + "logps/ref_chosen": -60.42033386230469, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -261.1438903808594, + "loss": 1.116, + "margin_dpo/margin_mean": 83.12100982666016, + "margin_dpo/margin_std": 115.0757827758789, + "step": 197 + }, + { + "KL/chosen_KL_mean": -112.49896240234375, + "KL/mean": -166.41671752929688, + "KL/rejected_KL_mean": -220.33447265625, + "KL/std": 126.63810729980469, + "epoch": 0.2907488986784141, + "fcm_dpo/beta": 0.0043054306879639626, + "fcm_dpo/delta": -0.06737668812274933, + "fcm_dpo/margin": 107.83549499511719, + "fcm_dpo/q_t": 0.3973381817340851, + "grad_norm": 19.930042266845703, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.3347511887550354, + "logits/rejected": -0.3346249759197235, + "logps/chosen": -167.53515625, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -317.5777282714844, + "loss": 1.0595, + "margin_dpo/margin_mean": 107.83550262451172, + "margin_dpo/margin_std": 144.60723876953125, + "step": 198 + }, + { + "KL/chosen_KL_mean": -108.42107391357422, + "KL/mean": -160.91925048828125, + "KL/rejected_KL_mean": -213.41744995117188, + "KL/std": 113.93467712402344, + "epoch": 0.2922173274596182, + "fcm_dpo/beta": 0.004234119318425655, + "fcm_dpo/delta": -0.047552645206451416, + "fcm_dpo/margin": 104.99636840820312, + "fcm_dpo/q_t": 0.39938676357269287, + "grad_norm": 21.286582946777344, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.3568047285079956, + "logits/rejected": -0.34048551321029663, + "logps/chosen": -165.24990844726562, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -298.065673828125, + "loss": 1.0729, + "margin_dpo/margin_mean": 104.99636840820312, + "margin_dpo/margin_std": 139.47503662109375, + "step": 199 + }, + { + "KL/chosen_KL_mean": -106.86558532714844, + "KL/mean": -160.4095916748047, + "KL/rejected_KL_mean": -213.95359802246094, + "KL/std": 124.127197265625, + "epoch": 0.2936857562408223, + "fcm_dpo/beta": 0.0041997479274868965, + "fcm_dpo/delta": -0.05252185836434364, + "fcm_dpo/margin": 107.0880126953125, + "fcm_dpo/q_t": 0.4007049798965454, + "grad_norm": 19.893062591552734, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.26880979537963867, + "logits/rejected": -0.24893805384635925, + "logps/chosen": -159.93264770507812, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -294.5620422363281, + "loss": 1.0865, + "margin_dpo/margin_mean": 107.0880126953125, + "margin_dpo/margin_std": 158.81661987304688, + "step": 200 + }, + { + "KL/chosen_KL_mean": -111.46297454833984, + "KL/mean": -163.86080932617188, + "KL/rejected_KL_mean": -216.25863647460938, + "KL/std": 128.08802795410156, + "epoch": 0.29515418502202645, + "fcm_dpo/beta": 0.004188035614788532, + "fcm_dpo/delta": -0.04069505259394646, + "fcm_dpo/margin": 104.79566955566406, + "fcm_dpo/q_t": 0.4008665084838867, + "grad_norm": 19.711732864379883, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.3536655306816101, + "logits/rejected": -0.34921911358833313, + "logps/chosen": -186.86520385742188, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -331.06683349609375, + "loss": 1.0782, + "margin_dpo/margin_mean": 104.79566955566406, + "margin_dpo/margin_std": 143.77313232421875, + "step": 201 + }, + { + "KL/chosen_KL_mean": -115.6180191040039, + "KL/mean": -152.35928344726562, + "KL/rejected_KL_mean": -189.10052490234375, + "KL/std": 110.5206527709961, + "epoch": 0.2966226138032305, + "fcm_dpo/beta": 0.004229954443871975, + "fcm_dpo/delta": 0.09195201843976974, + "fcm_dpo/margin": 73.4825210571289, + "fcm_dpo/q_t": 0.43059661984443665, + "grad_norm": 19.91847038269043, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.3016967177391052, + "logits/rejected": -0.3155549168586731, + "logps/chosen": -165.71932983398438, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -276.0855712890625, + "loss": 1.1837, + "margin_dpo/margin_mean": 73.48252868652344, + "margin_dpo/margin_std": 142.18524169921875, + "step": 202 + }, + { + "KL/chosen_KL_mean": -113.98049926757812, + "KL/mean": -159.70396423339844, + "KL/rejected_KL_mean": -205.42742919921875, + "KL/std": 113.47210693359375, + "epoch": 0.29809104258443464, + "fcm_dpo/beta": 0.0042491694912314415, + "fcm_dpo/delta": 0.011875176802277565, + "fcm_dpo/margin": 91.44692993164062, + "fcm_dpo/q_t": 0.41100189089775085, + "grad_norm": 20.661174774169922, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.34466350078582764, + "logits/rejected": -0.33388030529022217, + "logps/chosen": -174.59019470214844, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -291.3233947753906, + "loss": 1.1, + "margin_dpo/margin_mean": 91.44693756103516, + "margin_dpo/margin_std": 124.70313262939453, + "step": 203 + }, + { + "KL/chosen_KL_mean": -128.3519287109375, + "KL/mean": -170.64601135253906, + "KL/rejected_KL_mean": -212.94009399414062, + "KL/std": 125.15516662597656, + "epoch": 0.29955947136563876, + "fcm_dpo/beta": 0.004280552733689547, + "fcm_dpo/delta": 0.03934932500123978, + "fcm_dpo/margin": 84.58815002441406, + "fcm_dpo/q_t": 0.42007431387901306, + "grad_norm": 22.547698974609375, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.42533358931541443, + "logits/rejected": -0.38667869567871094, + "logps/chosen": -208.5168914794922, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -300.635986328125, + "loss": 1.1461, + "margin_dpo/margin_mean": 84.58815002441406, + "margin_dpo/margin_std": 144.9606170654297, + "step": 204 + }, + { + "KL/chosen_KL_mean": -124.03070068359375, + "KL/mean": -180.54966735839844, + "KL/rejected_KL_mean": -237.0686492919922, + "KL/std": 127.14155578613281, + "epoch": 0.3010279001468429, + "fcm_dpo/beta": 0.004203906282782555, + "fcm_dpo/delta": -0.08103010058403015, + "fcm_dpo/margin": 113.03794860839844, + "fcm_dpo/q_t": 0.39093706011772156, + "grad_norm": 20.967945098876953, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.31887465715408325, + "logits/rejected": -0.2883029878139496, + "logps/chosen": -183.41543579101562, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -322.1936950683594, + "loss": 1.0529, + "margin_dpo/margin_mean": 113.03794860839844, + "margin_dpo/margin_std": 139.7296142578125, + "step": 205 + }, + { + "KL/chosen_KL_mean": -115.17393493652344, + "KL/mean": -175.67129516601562, + "KL/rejected_KL_mean": -236.16864013671875, + "KL/std": 120.58004760742188, + "epoch": 0.302496328928047, + "fcm_dpo/beta": 0.004139425233006477, + "fcm_dpo/delta": -0.10684061050415039, + "fcm_dpo/margin": 120.99469757080078, + "fcm_dpo/q_t": 0.38356366753578186, + "grad_norm": 26.109760284423828, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.27412861585617065, + "logits/rejected": -0.2763686776161194, + "logps/chosen": -162.138427734375, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -335.12213134765625, + "loss": 1.0156, + "margin_dpo/margin_mean": 120.99468994140625, + "margin_dpo/margin_std": 130.6953125, + "step": 206 + }, + { + "KL/chosen_KL_mean": -105.1968002319336, + "KL/mean": -174.7778778076172, + "KL/rejected_KL_mean": -244.35894775390625, + "KL/std": 136.5259246826172, + "epoch": 0.3039647577092511, + "fcm_dpo/beta": 0.004033949691802263, + "fcm_dpo/delta": -0.17078402638435364, + "fcm_dpo/margin": 139.16213989257812, + "fcm_dpo/q_t": 0.37533849477767944, + "grad_norm": 25.737268447875977, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.3557325005531311, + "logits/rejected": -0.3302071690559387, + "logps/chosen": -161.2530517578125, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -328.8067626953125, + "loss": 0.9897, + "margin_dpo/margin_mean": 139.16213989257812, + "margin_dpo/margin_std": 157.79168701171875, + "step": 207 + }, + { + "KL/chosen_KL_mean": -164.72898864746094, + "KL/mean": -220.01724243164062, + "KL/rejected_KL_mean": -275.3055114746094, + "KL/std": 134.671875, + "epoch": 0.3054331864904552, + "fcm_dpo/beta": 0.003962271846830845, + "fcm_dpo/delta": -0.04002426564693451, + "fcm_dpo/margin": 110.57653045654297, + "fcm_dpo/q_t": 0.40099895000457764, + "grad_norm": 24.69184112548828, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.3562470078468323, + "logits/rejected": -0.337843656539917, + "logps/chosen": -231.79660034179688, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -369.5924072265625, + "loss": 1.092, + "margin_dpo/margin_mean": 110.57653045654297, + "margin_dpo/margin_std": 163.5906982421875, + "step": 208 + }, + { + "KL/chosen_KL_mean": -138.68316650390625, + "KL/mean": -187.58811950683594, + "KL/rejected_KL_mean": -236.49307250976562, + "KL/std": 122.4765853881836, + "epoch": 0.3069016152716593, + "fcm_dpo/beta": 0.003970026038587093, + "fcm_dpo/delta": 0.012157567776739597, + "fcm_dpo/margin": 97.8099365234375, + "fcm_dpo/q_t": 0.4124048352241516, + "grad_norm": 29.169567108154297, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.25604674220085144, + "logits/rejected": -0.24463605880737305, + "logps/chosen": -194.86485290527344, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -317.4346008300781, + "loss": 1.1271, + "margin_dpo/margin_mean": 97.8099365234375, + "margin_dpo/margin_std": 156.21786499023438, + "step": 209 + }, + { + "KL/chosen_KL_mean": -126.92047119140625, + "KL/mean": -183.3939971923828, + "KL/rejected_KL_mean": -239.86752319335938, + "KL/std": 125.841552734375, + "epoch": 0.30837004405286345, + "fcm_dpo/beta": 0.003958011977374554, + "fcm_dpo/delta": -0.0493808314204216, + "fcm_dpo/margin": 112.94706726074219, + "fcm_dpo/q_t": 0.39954549074172974, + "grad_norm": 24.660263061523438, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.2841013967990875, + "logits/rejected": -0.275867760181427, + "logps/chosen": -173.29229736328125, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -316.54913330078125, + "loss": 1.0728, + "margin_dpo/margin_mean": 112.94705200195312, + "margin_dpo/margin_std": 152.02679443359375, + "step": 210 + }, + { + "KL/chosen_KL_mean": -178.33975219726562, + "KL/mean": -226.99276733398438, + "KL/rejected_KL_mean": -275.6457824707031, + "KL/std": 142.97116088867188, + "epoch": 0.30983847283406757, + "fcm_dpo/beta": 0.003944946452975273, + "fcm_dpo/delta": 0.01675173081457615, + "fcm_dpo/margin": 97.3060302734375, + "fcm_dpo/q_t": 0.4174480438232422, + "grad_norm": 32.09341812133789, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.2926616668701172, + "logits/rejected": -0.25190287828445435, + "logps/chosen": -257.2720947265625, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -362.4667663574219, + "loss": 1.141, + "margin_dpo/margin_mean": 97.3060302734375, + "margin_dpo/margin_std": 168.17588806152344, + "step": 211 + }, + { + "KL/chosen_KL_mean": -147.63983154296875, + "KL/mean": -216.47125244140625, + "KL/rejected_KL_mean": -285.30267333984375, + "KL/std": 154.91241455078125, + "epoch": 0.31130690161527164, + "fcm_dpo/beta": 0.0038361717015504837, + "fcm_dpo/delta": -0.13747426867485046, + "fcm_dpo/margin": 137.662841796875, + "fcm_dpo/q_t": 0.3819977641105652, + "grad_norm": 25.37755584716797, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.3288855254650116, + "logits/rejected": -0.32180070877075195, + "logps/chosen": -205.83685302734375, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05785369873047, + "logps/rejected": -388.36053466796875, + "loss": 1.0332, + "margin_dpo/margin_mean": 137.66285705566406, + "margin_dpo/margin_std": 171.55409240722656, + "step": 212 + }, + { + "KL/chosen_KL_mean": -137.6666259765625, + "KL/mean": -203.332275390625, + "KL/rejected_KL_mean": -268.9979248046875, + "KL/std": 133.84786987304688, + "epoch": 0.31277533039647576, + "fcm_dpo/beta": 0.0037672575563192368, + "fcm_dpo/delta": -0.10035522282123566, + "fcm_dpo/margin": 131.33128356933594, + "fcm_dpo/q_t": 0.38752636313438416, + "grad_norm": 25.538270950317383, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.3342677354812622, + "logits/rejected": -0.3116719126701355, + "logps/chosen": -205.17935180664062, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -362.9126281738281, + "loss": 1.036, + "margin_dpo/margin_mean": 131.33128356933594, + "margin_dpo/margin_std": 160.22622680664062, + "step": 213 + }, + { + "KL/chosen_KL_mean": -119.18568420410156, + "KL/mean": -178.6602020263672, + "KL/rejected_KL_mean": -238.13473510742188, + "KL/std": 132.51971435546875, + "epoch": 0.3142437591776799, + "fcm_dpo/beta": 0.0037363125011324883, + "fcm_dpo/delta": -0.04649418964982033, + "fcm_dpo/margin": 118.94905090332031, + "fcm_dpo/q_t": 0.39983218908309937, + "grad_norm": 20.97165870666504, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.23450475931167603, + "logits/rejected": -0.23990775644779205, + "logps/chosen": -160.7905731201172, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -315.65216064453125, + "loss": 1.0677, + "margin_dpo/margin_mean": 118.94905090332031, + "margin_dpo/margin_std": 155.6826629638672, + "step": 214 + }, + { + "KL/chosen_KL_mean": -140.20767211914062, + "KL/mean": -203.81375122070312, + "KL/rejected_KL_mean": -267.41986083984375, + "KL/std": 139.04193115234375, + "epoch": 0.315712187958884, + "fcm_dpo/beta": 0.0036775285843759775, + "fcm_dpo/delta": -0.0715101957321167, + "fcm_dpo/margin": 127.212158203125, + "fcm_dpo/q_t": 0.39243778586387634, + "grad_norm": 25.126728057861328, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.28624850511550903, + "logits/rejected": -0.2777059078216553, + "logps/chosen": -193.4869384765625, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -357.3844909667969, + "loss": 1.0375, + "margin_dpo/margin_mean": 127.212158203125, + "margin_dpo/margin_std": 143.94126892089844, + "step": 215 + }, + { + "KL/chosen_KL_mean": -144.26304626464844, + "KL/mean": -206.27752685546875, + "KL/rejected_KL_mean": -268.2920227050781, + "KL/std": 143.73822021484375, + "epoch": 0.31718061674008813, + "fcm_dpo/beta": 0.003656826913356781, + "fcm_dpo/delta": -0.05615860968828201, + "fcm_dpo/margin": 124.02898406982422, + "fcm_dpo/q_t": 0.39861971139907837, + "grad_norm": 26.616178512573242, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.26884669065475464, + "logits/rejected": -0.2517741322517395, + "logps/chosen": -193.15084838867188, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -345.490966796875, + "loss": 1.0802, + "margin_dpo/margin_mean": 124.02898406982422, + "margin_dpo/margin_std": 176.90469360351562, + "step": 216 + }, + { + "KL/chosen_KL_mean": -149.80250549316406, + "KL/mean": -223.11126708984375, + "KL/rejected_KL_mean": -296.4200439453125, + "KL/std": 148.53799438476562, + "epoch": 0.3186490455212922, + "fcm_dpo/beta": 0.0035566347651183605, + "fcm_dpo/delta": -0.12853044271469116, + "fcm_dpo/margin": 146.6175537109375, + "fcm_dpo/q_t": 0.3826901316642761, + "grad_norm": 20.384174346923828, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.2615566849708557, + "logits/rejected": -0.25557541847229004, + "logps/chosen": -199.64779663085938, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -396.4983825683594, + "loss": 1.0109, + "margin_dpo/margin_mean": 146.6175537109375, + "margin_dpo/margin_std": 168.1234130859375, + "step": 217 + }, + { + "KL/chosen_KL_mean": -155.2289581298828, + "KL/mean": -211.18832397460938, + "KL/rejected_KL_mean": -267.147705078125, + "KL/std": 144.78065490722656, + "epoch": 0.3201174743024963, + "fcm_dpo/beta": 0.003537412267178297, + "fcm_dpo/delta": 0.0042178574949502945, + "fcm_dpo/margin": 111.91874694824219, + "fcm_dpo/q_t": 0.4114026129245758, + "grad_norm": 23.56877326965332, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.2710033059120178, + "logits/rejected": -0.26507091522216797, + "logps/chosen": -213.80563354492188, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -354.9941101074219, + "loss": 1.1138, + "margin_dpo/margin_mean": 111.91874694824219, + "margin_dpo/margin_std": 170.3919677734375, + "step": 218 + }, + { + "KL/chosen_KL_mean": -167.38955688476562, + "KL/mean": -215.71990966796875, + "KL/rejected_KL_mean": -264.05023193359375, + "KL/std": 150.576904296875, + "epoch": 0.32158590308370044, + "fcm_dpo/beta": 0.003590244799852371, + "fcm_dpo/delta": 0.05411606281995773, + "fcm_dpo/margin": 96.66064453125, + "fcm_dpo/q_t": 0.4225817918777466, + "grad_norm": 27.783300399780273, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.3175503611564636, + "logits/rejected": -0.31175172328948975, + "logps/chosen": -228.47341918945312, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -349.88067626953125, + "loss": 1.1723, + "margin_dpo/margin_mean": 96.66064453125, + "margin_dpo/margin_std": 184.8462371826172, + "step": 219 + }, + { + "KL/chosen_KL_mean": -190.74407958984375, + "KL/mean": -230.7036590576172, + "KL/rejected_KL_mean": -270.6632080078125, + "KL/std": 130.32717895507812, + "epoch": 0.32305433186490456, + "fcm_dpo/beta": 0.0036365140695124865, + "fcm_dpo/delta": 0.11278827488422394, + "fcm_dpo/margin": 79.91914367675781, + "fcm_dpo/q_t": 0.4320225119590759, + "grad_norm": 28.48792266845703, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.3003755211830139, + "logits/rejected": -0.2773016095161438, + "logps/chosen": -260.775390625, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -358.3487548828125, + "loss": 1.1734, + "margin_dpo/margin_mean": 79.91913604736328, + "margin_dpo/margin_std": 133.44189453125, + "step": 220 + }, + { + "KL/chosen_KL_mean": -163.2496337890625, + "KL/mean": -252.80859375, + "KL/rejected_KL_mean": -342.3675231933594, + "KL/std": 164.04974365234375, + "epoch": 0.3245227606461087, + "fcm_dpo/beta": 0.003541819052770734, + "fcm_dpo/delta": -0.24972575902938843, + "fcm_dpo/margin": 179.11788940429688, + "fcm_dpo/q_t": 0.3551170825958252, + "grad_norm": 28.492124557495117, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.28169721364974976, + "logits/rejected": -0.28886687755584717, + "logps/chosen": -215.40435791015625, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -448.835205078125, + "loss": 0.9307, + "margin_dpo/margin_mean": 179.11788940429688, + "margin_dpo/margin_std": 166.56512451171875, + "step": 221 + }, + { + "KL/chosen_KL_mean": -164.61285400390625, + "KL/mean": -231.60415649414062, + "KL/rejected_KL_mean": -298.595458984375, + "KL/std": 149.91842651367188, + "epoch": 0.32599118942731276, + "fcm_dpo/beta": 0.0034665679559111595, + "fcm_dpo/delta": -0.0677119642496109, + "fcm_dpo/margin": 133.98260498046875, + "fcm_dpo/q_t": 0.39466869831085205, + "grad_norm": 19.775772094726562, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.31213629245758057, + "logits/rejected": -0.3134229779243469, + "logps/chosen": -225.58395385742188, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -398.59661865234375, + "loss": 1.0546, + "margin_dpo/margin_mean": 133.98260498046875, + "margin_dpo/margin_std": 168.62554931640625, + "step": 222 + }, + { + "KL/chosen_KL_mean": -172.0750274658203, + "KL/mean": -228.94554138183594, + "KL/rejected_KL_mean": -285.8160705566406, + "KL/std": 145.8609161376953, + "epoch": 0.3274596182085169, + "fcm_dpo/beta": 0.0034393020905554295, + "fcm_dpo/delta": 0.009148719720542431, + "fcm_dpo/margin": 113.74103546142578, + "fcm_dpo/q_t": 0.4120517075061798, + "grad_norm": 23.93748664855957, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.25658541917800903, + "logits/rejected": -0.2517361640930176, + "logps/chosen": -224.71560668945312, + "logps/ref_chosen": -52.64057540893555, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -368.64111328125, + "loss": 1.1262, + "margin_dpo/margin_mean": 113.74102783203125, + "margin_dpo/margin_std": 185.43588256835938, + "step": 223 + }, + { + "KL/chosen_KL_mean": -147.19102478027344, + "KL/mean": -212.12939453125, + "KL/rejected_KL_mean": -277.0677490234375, + "KL/std": 160.6813507080078, + "epoch": 0.328928046989721, + "fcm_dpo/beta": 0.003397725522518158, + "fcm_dpo/delta": -0.04458841681480408, + "fcm_dpo/margin": 129.87673950195312, + "fcm_dpo/q_t": 0.4026610255241394, + "grad_norm": 25.208892822265625, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.29166120290756226, + "logits/rejected": -0.2709968686103821, + "logps/chosen": -195.78643798828125, + "logps/ref_chosen": -48.59541320800781, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -354.18426513671875, + "loss": 1.0858, + "margin_dpo/margin_mean": 129.87673950195312, + "margin_dpo/margin_std": 186.29876708984375, + "step": 224 + }, + { + "KL/chosen_KL_mean": -164.91543579101562, + "KL/mean": -239.89012145996094, + "KL/rejected_KL_mean": -314.86480712890625, + "KL/std": 153.07418823242188, + "epoch": 0.3303964757709251, + "fcm_dpo/beta": 0.0033569016959518194, + "fcm_dpo/delta": -0.10895158350467682, + "fcm_dpo/margin": 149.94940185546875, + "fcm_dpo/q_t": 0.3864118158817291, + "grad_norm": 20.780384063720703, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.32863831520080566, + "logits/rejected": -0.317804753780365, + "logps/chosen": -222.9158935546875, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90291595458984, + "logps/rejected": -414.7677307128906, + "loss": 1.0318, + "margin_dpo/margin_mean": 149.94940185546875, + "margin_dpo/margin_std": 185.24148559570312, + "step": 225 + }, + { + "KL/chosen_KL_mean": -139.17755126953125, + "KL/mean": -198.74496459960938, + "KL/rejected_KL_mean": -258.3123779296875, + "KL/std": 148.14669799804688, + "epoch": 0.33186490455212925, + "fcm_dpo/beta": 0.0033233477734029293, + "fcm_dpo/delta": 0.00356471911072731, + "fcm_dpo/margin": 119.13485717773438, + "fcm_dpo/q_t": 0.4114220142364502, + "grad_norm": 24.846914291381836, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.3634711503982544, + "logits/rejected": -0.3225502371788025, + "logps/chosen": -198.0763397216797, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -337.0001525878906, + "loss": 1.1056, + "margin_dpo/margin_mean": 119.13485717773438, + "margin_dpo/margin_std": 170.89193725585938, + "step": 226 + }, + { + "KL/chosen_KL_mean": -161.0607452392578, + "KL/mean": -235.2391357421875, + "KL/rejected_KL_mean": -309.41754150390625, + "KL/std": 167.58319091796875, + "epoch": 0.3333333333333333, + "fcm_dpo/beta": 0.0032854501623660326, + "fcm_dpo/delta": -0.09252498298883438, + "fcm_dpo/margin": 148.35679626464844, + "fcm_dpo/q_t": 0.38895145058631897, + "grad_norm": 21.650360107421875, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.3720400631427765, + "logits/rejected": -0.35931509733200073, + "logps/chosen": -220.1329345703125, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -408.82989501953125, + "loss": 1.0331, + "margin_dpo/margin_mean": 148.35679626464844, + "margin_dpo/margin_std": 176.366943359375, + "step": 227 + }, + { + "KL/chosen_KL_mean": -169.7421875, + "KL/mean": -223.08303833007812, + "KL/rejected_KL_mean": -276.4239196777344, + "KL/std": 140.11859130859375, + "epoch": 0.33480176211453744, + "fcm_dpo/beta": 0.003310044761747122, + "fcm_dpo/delta": 0.04850192740559578, + "fcm_dpo/margin": 106.68174743652344, + "fcm_dpo/q_t": 0.41904619336128235, + "grad_norm": 23.304601669311523, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.33180832862854004, + "logits/rejected": -0.31371521949768066, + "logps/chosen": -235.6334686279297, + "logps/ref_chosen": -65.89128875732422, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -367.47265625, + "loss": 1.1403, + "margin_dpo/margin_mean": 106.6817398071289, + "margin_dpo/margin_std": 171.9312286376953, + "step": 228 + }, + { + "KL/chosen_KL_mean": -174.5965576171875, + "KL/mean": -231.82008361816406, + "KL/rejected_KL_mean": -289.04364013671875, + "KL/std": 162.17124938964844, + "epoch": 0.33627019089574156, + "fcm_dpo/beta": 0.0033262791112065315, + "fcm_dpo/delta": 0.02005820721387863, + "fcm_dpo/margin": 114.44705200195312, + "fcm_dpo/q_t": 0.41243118047714233, + "grad_norm": 36.011322021484375, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.4325563311576843, + "logits/rejected": -0.4033244848251343, + "logps/chosen": -245.3029327392578, + "logps/ref_chosen": -70.70637512207031, + "logps/ref_rejected": -84.52741241455078, + "logps/rejected": -373.571044921875, + "loss": 1.1159, + "margin_dpo/margin_mean": 114.4470443725586, + "margin_dpo/margin_std": 170.42984008789062, + "step": 229 + }, + { + "KL/chosen_KL_mean": -136.42132568359375, + "KL/mean": -223.09548950195312, + "KL/rejected_KL_mean": -309.7696533203125, + "KL/std": 155.21017456054688, + "epoch": 0.3377386196769457, + "fcm_dpo/beta": 0.003240791615098715, + "fcm_dpo/delta": -0.17158903181552887, + "fcm_dpo/margin": 173.3483123779297, + "fcm_dpo/q_t": 0.3700660765171051, + "grad_norm": 35.2843132019043, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.3165392279624939, + "logits/rejected": -0.3209174871444702, + "logps/chosen": -175.70333862304688, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -395.3915710449219, + "loss": 0.9711, + "margin_dpo/margin_mean": 173.34832763671875, + "margin_dpo/margin_std": 170.71090698242188, + "step": 230 + }, + { + "KL/chosen_KL_mean": -159.6990509033203, + "KL/mean": -219.6187744140625, + "KL/rejected_KL_mean": -279.5384826660156, + "KL/std": 139.4580078125, + "epoch": 0.3392070484581498, + "fcm_dpo/beta": 0.00322412746027112, + "fcm_dpo/delta": 0.014150663278996944, + "fcm_dpo/margin": 119.83944702148438, + "fcm_dpo/q_t": 0.4127604365348816, + "grad_norm": 24.092422485351562, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.3582533597946167, + "logits/rejected": -0.3271549940109253, + "logps/chosen": -222.97549438476562, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -353.66241455078125, + "loss": 1.1, + "margin_dpo/margin_mean": 119.83944702148438, + "margin_dpo/margin_std": 162.927978515625, + "step": 231 + }, + { + "KL/chosen_KL_mean": -195.55023193359375, + "KL/mean": -243.71401977539062, + "KL/rejected_KL_mean": -291.87774658203125, + "KL/std": 161.44937133789062, + "epoch": 0.3406754772393539, + "fcm_dpo/beta": 0.003260795958340168, + "fcm_dpo/delta": 0.0887773260474205, + "fcm_dpo/margin": 96.32752227783203, + "fcm_dpo/q_t": 0.43005359172821045, + "grad_norm": 22.96601676940918, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.38562819361686707, + "logits/rejected": -0.3633359670639038, + "logps/chosen": -266.29901123046875, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -375.8548278808594, + "loss": 1.164, + "margin_dpo/margin_mean": 96.3275146484375, + "margin_dpo/margin_std": 165.77828979492188, + "step": 232 + }, + { + "KL/chosen_KL_mean": -174.73155212402344, + "KL/mean": -252.10891723632812, + "KL/rejected_KL_mean": -329.48626708984375, + "KL/std": 172.6971435546875, + "epoch": 0.342143906020558, + "fcm_dpo/beta": 0.003234952688217163, + "fcm_dpo/delta": -0.10582563281059265, + "fcm_dpo/margin": 154.7547149658203, + "fcm_dpo/q_t": 0.3919374644756317, + "grad_norm": 22.466392517089844, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.3720843195915222, + "logits/rejected": -0.373913049697876, + "logps/chosen": -229.61448669433594, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.4800796508789, + "logps/rejected": -436.96630859375, + "loss": 1.064, + "margin_dpo/margin_mean": 154.7547149658203, + "margin_dpo/margin_std": 224.2666015625, + "step": 233 + }, + { + "KL/chosen_KL_mean": -169.48391723632812, + "KL/mean": -249.71534729003906, + "KL/rejected_KL_mean": -329.9467468261719, + "KL/std": 153.66720581054688, + "epoch": 0.3436123348017621, + "fcm_dpo/beta": 0.0031358040869235992, + "fcm_dpo/delta": -0.11081574857234955, + "fcm_dpo/margin": 160.4628448486328, + "fcm_dpo/q_t": 0.3850485682487488, + "grad_norm": 30.547178268432617, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.3440871834754944, + "logits/rejected": -0.37053510546684265, + "logps/chosen": -213.57843017578125, + "logps/ref_chosen": -44.094520568847656, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -429.953369140625, + "loss": 1.0167, + "margin_dpo/margin_mean": 160.4628448486328, + "margin_dpo/margin_std": 173.27178955078125, + "step": 234 + }, + { + "KL/chosen_KL_mean": -202.32957458496094, + "KL/mean": -254.01788330078125, + "KL/rejected_KL_mean": -305.7062072753906, + "KL/std": 147.2139892578125, + "epoch": 0.34508076358296624, + "fcm_dpo/beta": 0.0031859464943408966, + "fcm_dpo/delta": 0.072694793343544, + "fcm_dpo/margin": 103.37664031982422, + "fcm_dpo/q_t": 0.4246324300765991, + "grad_norm": 26.673070907592773, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.40312904119491577, + "logits/rejected": -0.38146403431892395, + "logps/chosen": -264.5675048828125, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39506530761719, + "logps/rejected": -396.10125732421875, + "loss": 1.1606, + "margin_dpo/margin_mean": 103.37664031982422, + "margin_dpo/margin_std": 178.27023315429688, + "step": 235 + }, + { + "KL/chosen_KL_mean": -145.07064819335938, + "KL/mean": -228.562255859375, + "KL/rejected_KL_mean": -312.0538635253906, + "KL/std": 156.35775756835938, + "epoch": 0.3465491923641703, + "fcm_dpo/beta": 0.003130989382043481, + "fcm_dpo/delta": -0.12949799001216888, + "fcm_dpo/margin": 166.98321533203125, + "fcm_dpo/q_t": 0.37771958112716675, + "grad_norm": 37.26255416870117, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.3943854570388794, + "logits/rejected": -0.3957618474960327, + "logps/chosen": -194.41201782226562, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -415.56549072265625, + "loss": 0.9815, + "margin_dpo/margin_mean": 166.98321533203125, + "margin_dpo/margin_std": 149.95436096191406, + "step": 236 + }, + { + "KL/chosen_KL_mean": -200.8836669921875, + "KL/mean": -262.3459777832031, + "KL/rejected_KL_mean": -323.80828857421875, + "KL/std": 158.80929565429688, + "epoch": 0.34801762114537443, + "fcm_dpo/beta": 0.0031184733379632235, + "fcm_dpo/delta": 0.01728936657309532, + "fcm_dpo/margin": 122.92462921142578, + "fcm_dpo/q_t": 0.41311001777648926, + "grad_norm": 30.76072120666504, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.36829280853271484, + "logits/rejected": -0.36081379652023315, + "logps/chosen": -255.05178833007812, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -418.5886535644531, + "loss": 1.1151, + "margin_dpo/margin_mean": 122.92462921142578, + "margin_dpo/margin_std": 184.13796997070312, + "step": 237 + }, + { + "KL/chosen_KL_mean": -175.17578125, + "KL/mean": -237.3186492919922, + "KL/rejected_KL_mean": -299.4615478515625, + "KL/std": 155.89273071289062, + "epoch": 0.34948604992657856, + "fcm_dpo/beta": 0.0031098374165594578, + "fcm_dpo/delta": 0.01364682987332344, + "fcm_dpo/margin": 124.28575134277344, + "fcm_dpo/q_t": 0.4137893319129944, + "grad_norm": 22.210086822509766, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.3522963523864746, + "logits/rejected": -0.3655349612236023, + "logps/chosen": -229.14889526367188, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -388.8794860839844, + "loss": 1.1056, + "margin_dpo/margin_mean": 124.28575134277344, + "margin_dpo/margin_std": 175.60623168945312, + "step": 238 + }, + { + "KL/chosen_KL_mean": -194.32131958007812, + "KL/mean": -264.3119201660156, + "KL/rejected_KL_mean": -334.3025207519531, + "KL/std": 149.57962036132812, + "epoch": 0.3509544787077827, + "fcm_dpo/beta": 0.0030899234116077423, + "fcm_dpo/delta": -0.0351216085255146, + "fcm_dpo/margin": 139.981201171875, + "fcm_dpo/q_t": 0.3998359739780426, + "grad_norm": 27.21063804626465, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.34761273860931396, + "logits/rejected": -0.3327868580818176, + "logps/chosen": -252.4191436767578, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -427.89544677734375, + "loss": 1.0671, + "margin_dpo/margin_mean": 139.981201171875, + "margin_dpo/margin_std": 168.2056121826172, + "step": 239 + }, + { + "KL/chosen_KL_mean": -205.28285217285156, + "KL/mean": -263.65673828125, + "KL/rejected_KL_mean": -322.0306396484375, + "KL/std": 164.07862854003906, + "epoch": 0.3524229074889868, + "fcm_dpo/beta": 0.0031123950611799955, + "fcm_dpo/delta": 0.03792831301689148, + "fcm_dpo/margin": 116.74779510498047, + "fcm_dpo/q_t": 0.4176512360572815, + "grad_norm": 35.285789489746094, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.4174574017524719, + "logits/rejected": -0.39536041021347046, + "logps/chosen": -265.8973388671875, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -396.149169921875, + "loss": 1.1445, + "margin_dpo/margin_mean": 116.74779510498047, + "margin_dpo/margin_std": 196.01150512695312, + "step": 240 + }, + { + "KL/chosen_KL_mean": -170.59561157226562, + "KL/mean": -261.97332763671875, + "KL/rejected_KL_mean": -353.35101318359375, + "KL/std": 178.49505615234375, + "epoch": 0.35389133627019087, + "fcm_dpo/beta": 0.003058013506233692, + "fcm_dpo/delta": -0.1680062711238861, + "fcm_dpo/margin": 182.75537109375, + "fcm_dpo/q_t": 0.37589675188064575, + "grad_norm": 23.488187789916992, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.4499303102493286, + "logits/rejected": -0.42419755458831787, + "logps/chosen": -236.68667602539062, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -441.41192626953125, + "loss": 0.9946, + "margin_dpo/margin_mean": 182.75540161132812, + "margin_dpo/margin_std": 207.28306579589844, + "step": 241 + }, + { + "KL/chosen_KL_mean": -195.0386199951172, + "KL/mean": -261.244140625, + "KL/rejected_KL_mean": -327.4496154785156, + "KL/std": 152.1888427734375, + "epoch": 0.355359765051395, + "fcm_dpo/beta": 0.003036870388314128, + "fcm_dpo/delta": -0.0025676079094409943, + "fcm_dpo/margin": 132.41098022460938, + "fcm_dpo/q_t": 0.40894001722335815, + "grad_norm": 31.73065185546875, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.3427576422691345, + "logits/rejected": -0.31177082657814026, + "logps/chosen": -262.9025573730469, + "logps/ref_chosen": -67.86392974853516, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -410.8099365234375, + "loss": 1.0916, + "margin_dpo/margin_mean": 132.41098022460938, + "margin_dpo/margin_std": 177.2808380126953, + "step": 242 + }, + { + "KL/chosen_KL_mean": -196.42782592773438, + "KL/mean": -277.6934814453125, + "KL/rejected_KL_mean": -358.9591064453125, + "KL/std": 171.84295654296875, + "epoch": 0.3568281938325991, + "fcm_dpo/beta": 0.002979143988341093, + "fcm_dpo/delta": -0.0886797159910202, + "fcm_dpo/margin": 162.53128051757812, + "fcm_dpo/q_t": 0.38965845108032227, + "grad_norm": 21.928974151611328, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.3825536072254181, + "logits/rejected": -0.349841833114624, + "logps/chosen": -259.5120849609375, + "logps/ref_chosen": -63.0842399597168, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -435.29473876953125, + "loss": 1.0322, + "margin_dpo/margin_mean": 162.53128051757812, + "margin_dpo/margin_std": 191.0034637451172, + "step": 243 + }, + { + "KL/chosen_KL_mean": -171.73031616210938, + "KL/mean": -254.4647979736328, + "KL/rejected_KL_mean": -337.19927978515625, + "KL/std": 164.24757385253906, + "epoch": 0.35829662261380324, + "fcm_dpo/beta": 0.002924954518675804, + "fcm_dpo/delta": -0.08855760842561722, + "fcm_dpo/margin": 165.46893310546875, + "fcm_dpo/q_t": 0.38810837268829346, + "grad_norm": 32.71875762939453, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.43552297353744507, + "logits/rejected": -0.4287059009075165, + "logps/chosen": -232.87100219726562, + "logps/ref_chosen": -61.140689849853516, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -432.0912170410156, + "loss": 1.0172, + "margin_dpo/margin_mean": 165.4689483642578, + "margin_dpo/margin_std": 169.16017150878906, + "step": 244 + }, + { + "KL/chosen_KL_mean": -196.77499389648438, + "KL/mean": -261.5411376953125, + "KL/rejected_KL_mean": -326.3072509765625, + "KL/std": 160.04470825195312, + "epoch": 0.35976505139500736, + "fcm_dpo/beta": 0.0029053720645606518, + "fcm_dpo/delta": 0.023488402366638184, + "fcm_dpo/margin": 129.53224182128906, + "fcm_dpo/q_t": 0.4132363200187683, + "grad_norm": 32.37158966064453, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.43005210161209106, + "logits/rejected": -0.40020519495010376, + "logps/chosen": -264.03729248046875, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -413.94732666015625, + "loss": 1.1129, + "margin_dpo/margin_mean": 129.53224182128906, + "margin_dpo/margin_std": 176.98114013671875, + "step": 245 + }, + { + "KL/chosen_KL_mean": -197.54446411132812, + "KL/mean": -263.78521728515625, + "KL/rejected_KL_mean": -330.02593994140625, + "KL/std": 181.79562377929688, + "epoch": 0.36123348017621143, + "fcm_dpo/beta": 0.002928508911281824, + "fcm_dpo/delta": 0.012364866212010384, + "fcm_dpo/margin": 132.4814910888672, + "fcm_dpo/q_t": 0.41279125213623047, + "grad_norm": 29.707550048828125, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.4249339699745178, + "logits/rejected": -0.3837849497795105, + "logps/chosen": -264.241455078125, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -414.372314453125, + "loss": 1.1073, + "margin_dpo/margin_mean": 132.4814910888672, + "margin_dpo/margin_std": 192.24024963378906, + "step": 246 + }, + { + "KL/chosen_KL_mean": -230.63812255859375, + "KL/mean": -323.56597900390625, + "KL/rejected_KL_mean": -416.493896484375, + "KL/std": 187.34368896484375, + "epoch": 0.36270190895741555, + "fcm_dpo/beta": 0.002874248195439577, + "fcm_dpo/delta": -0.1419781595468521, + "fcm_dpo/margin": 185.85574340820312, + "fcm_dpo/q_t": 0.3779839277267456, + "grad_norm": 32.81269836425781, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.36910104751586914, + "logits/rejected": -0.3661007285118103, + "logps/chosen": -287.24346923828125, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29326629638672, + "logps/rejected": -522.787109375, + "loss": 1.0, + "margin_dpo/margin_mean": 185.8557586669922, + "margin_dpo/margin_std": 202.15628051757812, + "step": 247 + }, + { + "KL/chosen_KL_mean": -196.98757934570312, + "KL/mean": -277.89544677734375, + "KL/rejected_KL_mean": -358.8033447265625, + "KL/std": 152.73196411132812, + "epoch": 0.3641703377386197, + "fcm_dpo/beta": 0.0028204985428601503, + "fcm_dpo/delta": -0.05965063348412514, + "fcm_dpo/margin": 161.81573486328125, + "fcm_dpo/q_t": 0.3918088674545288, + "grad_norm": 24.8028564453125, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.3791336119174957, + "logits/rejected": -0.3977039158344269, + "logps/chosen": -241.03079223632812, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -450.66021728515625, + "loss": 1.0232, + "margin_dpo/margin_mean": 161.81573486328125, + "margin_dpo/margin_std": 147.67108154296875, + "step": 248 + }, + { + "KL/chosen_KL_mean": -255.34439086914062, + "KL/mean": -299.1787109375, + "KL/rejected_KL_mean": -343.01300048828125, + "KL/std": 171.1071014404297, + "epoch": 0.3656387665198238, + "fcm_dpo/beta": 0.0028861965984106064, + "fcm_dpo/delta": 0.15084651112556458, + "fcm_dpo/margin": 87.6685791015625, + "fcm_dpo/q_t": 0.44312575459480286, + "grad_norm": 36.00886154174805, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.41152477264404297, + "logits/rejected": -0.38563063740730286, + "logps/chosen": -317.7867431640625, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -423.4810485839844, + "loss": 1.2297, + "margin_dpo/margin_mean": 87.6685791015625, + "margin_dpo/margin_std": 201.31149291992188, + "step": 249 + }, + { + "KL/chosen_KL_mean": -218.36785888671875, + "KL/mean": -301.58306884765625, + "KL/rejected_KL_mean": -384.79827880859375, + "KL/std": 167.09228515625, + "epoch": 0.3671071953010279, + "fcm_dpo/beta": 0.0028843069449067116, + "fcm_dpo/delta": -0.08404796570539474, + "fcm_dpo/margin": 166.430419921875, + "fcm_dpo/q_t": 0.3882160782814026, + "grad_norm": 38.20355987548828, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.3953922390937805, + "logits/rejected": -0.3377394676208496, + "logps/chosen": -284.0045471191406, + "logps/ref_chosen": -65.63668823242188, + "logps/ref_rejected": -73.87184143066406, + "logps/rejected": -458.67010498046875, + "loss": 1.0207, + "margin_dpo/margin_mean": 166.430419921875, + "margin_dpo/margin_std": 170.97409057617188, + "step": 250 + }, + { + "KL/chosen_KL_mean": -231.93658447265625, + "KL/mean": -286.5303649902344, + "KL/rejected_KL_mean": -341.1241455078125, + "KL/std": 173.23834228515625, + "epoch": 0.368575624082232, + "fcm_dpo/beta": 0.002891149837523699, + "fcm_dpo/delta": 0.08716142922639847, + "fcm_dpo/margin": 109.18754577636719, + "fcm_dpo/q_t": 0.42818212509155273, + "grad_norm": 36.455989837646484, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.3735540509223938, + "logits/rejected": -0.34010833501815796, + "logps/chosen": -289.11932373046875, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -418.7875671386719, + "loss": 1.1682, + "margin_dpo/margin_mean": 109.18755340576172, + "margin_dpo/margin_std": 187.57749938964844, + "step": 251 + }, + { + "KL/chosen_KL_mean": -218.92428588867188, + "KL/mean": -294.3023986816406, + "KL/rejected_KL_mean": -369.68048095703125, + "KL/std": 150.38790893554688, + "epoch": 0.3700440528634361, + "fcm_dpo/beta": 0.002887298120185733, + "fcm_dpo/delta": -0.03713885694742203, + "fcm_dpo/margin": 150.75619506835938, + "fcm_dpo/q_t": 0.39595597982406616, + "grad_norm": 26.218870162963867, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.45149749517440796, + "logits/rejected": -0.4118601083755493, + "logps/chosen": -290.60992431640625, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75799560546875, + "logps/rejected": -454.4384765625, + "loss": 1.0427, + "margin_dpo/margin_mean": 150.75619506835938, + "margin_dpo/margin_std": 148.70468139648438, + "step": 252 + }, + { + "KL/chosen_KL_mean": -190.15496826171875, + "KL/mean": -262.7193298339844, + "KL/rejected_KL_mean": -335.2836608886719, + "KL/std": 159.39236450195312, + "epoch": 0.37151248164464024, + "fcm_dpo/beta": 0.0028968360275030136, + "fcm_dpo/delta": -0.021846629679203033, + "fcm_dpo/margin": 145.12869262695312, + "fcm_dpo/q_t": 0.40177327394485474, + "grad_norm": 25.34404182434082, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.4528924822807312, + "logits/rejected": -0.444851279258728, + "logps/chosen": -259.2889099121094, + "logps/ref_chosen": -69.1339340209961, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -433.9862060546875, + "loss": 1.0731, + "margin_dpo/margin_mean": 145.12869262695312, + "margin_dpo/margin_std": 174.61839294433594, + "step": 253 + }, + { + "KL/chosen_KL_mean": -183.0496368408203, + "KL/mean": -245.39013671875, + "KL/rejected_KL_mean": -307.73065185546875, + "KL/std": 165.1351318359375, + "epoch": 0.37298091042584436, + "fcm_dpo/beta": 0.0028911656700074673, + "fcm_dpo/delta": 0.04102412983775139, + "fcm_dpo/margin": 124.68098449707031, + "fcm_dpo/q_t": 0.41999658942222595, + "grad_norm": 24.079992294311523, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.40010130405426025, + "logits/rejected": -0.3824934661388397, + "logps/chosen": -237.2046356201172, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -398.03826904296875, + "loss": 1.139, + "margin_dpo/margin_mean": 124.68099975585938, + "margin_dpo/margin_std": 206.97445678710938, + "step": 254 + }, + { + "KL/chosen_KL_mean": -189.504150390625, + "KL/mean": -255.857666015625, + "KL/rejected_KL_mean": -322.211181640625, + "KL/std": 146.82505798339844, + "epoch": 0.3744493392070485, + "fcm_dpo/beta": 0.0028981873765587807, + "fcm_dpo/delta": 0.015838047489523888, + "fcm_dpo/margin": 132.70704650878906, + "fcm_dpo/q_t": 0.41084420680999756, + "grad_norm": 21.63443946838379, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.3540547490119934, + "logits/rejected": -0.34169769287109375, + "logps/chosen": -246.64581298828125, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -412.4197998046875, + "loss": 1.1089, + "margin_dpo/margin_mean": 132.70704650878906, + "margin_dpo/margin_std": 186.76455688476562, + "step": 255 + }, + { + "KL/chosen_KL_mean": -168.35980224609375, + "KL/mean": -241.50079345703125, + "KL/rejected_KL_mean": -314.6417541503906, + "KL/std": 160.8679656982422, + "epoch": 0.37591776798825255, + "fcm_dpo/beta": 0.002899360843002796, + "fcm_dpo/delta": -0.025190845131874084, + "fcm_dpo/margin": 146.28195190429688, + "fcm_dpo/q_t": 0.4021248519420624, + "grad_norm": 26.03676414489746, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.33700472116470337, + "logits/rejected": -0.34537577629089355, + "logps/chosen": -223.52330017089844, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -407.2046813964844, + "loss": 1.0717, + "margin_dpo/margin_mean": 146.28195190429688, + "margin_dpo/margin_std": 182.9776611328125, + "step": 256 + }, + { + "KL/chosen_KL_mean": -165.2210693359375, + "KL/mean": -241.1984100341797, + "KL/rejected_KL_mean": -317.1757507324219, + "KL/std": 167.52880859375, + "epoch": 0.37738619676945667, + "fcm_dpo/beta": 0.0028601905796676874, + "fcm_dpo/delta": -0.03735721856355667, + "fcm_dpo/margin": 151.95468139648438, + "fcm_dpo/q_t": 0.4005059599876404, + "grad_norm": 21.53156089782715, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.3417869806289673, + "logits/rejected": -0.3275744915008545, + "logps/chosen": -214.644775390625, + "logps/ref_chosen": -49.42369842529297, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -396.71368408203125, + "loss": 1.0698, + "margin_dpo/margin_mean": 151.95468139648438, + "margin_dpo/margin_std": 187.14981079101562, + "step": 257 + }, + { + "KL/chosen_KL_mean": -218.06576538085938, + "KL/mean": -292.39105224609375, + "KL/rejected_KL_mean": -366.71636962890625, + "KL/std": 176.79669189453125, + "epoch": 0.3788546255506608, + "fcm_dpo/beta": 0.0028670839965343475, + "fcm_dpo/delta": -0.02737291157245636, + "fcm_dpo/margin": 148.650634765625, + "fcm_dpo/q_t": 0.40125665068626404, + "grad_norm": 25.97199249267578, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.32123446464538574, + "logits/rejected": -0.3179280161857605, + "logps/chosen": -277.44989013671875, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.99010467529297, + "logps/rejected": -462.70648193359375, + "loss": 1.0901, + "margin_dpo/margin_mean": 148.650634765625, + "margin_dpo/margin_std": 211.3903045654297, + "step": 258 + }, + { + "KL/chosen_KL_mean": -207.76504516601562, + "KL/mean": -270.2027893066406, + "KL/rejected_KL_mean": -332.64056396484375, + "KL/std": 169.08270263671875, + "epoch": 0.3803230543318649, + "fcm_dpo/beta": 0.002867575269192457, + "fcm_dpo/delta": 0.04339686781167984, + "fcm_dpo/margin": 124.87550354003906, + "fcm_dpo/q_t": 0.4185197353363037, + "grad_norm": 22.1605281829834, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.3787084221839905, + "logits/rejected": -0.37561601400375366, + "logps/chosen": -260.5933837890625, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.191650390625, + "logps/rejected": -421.83221435546875, + "loss": 1.1309, + "margin_dpo/margin_mean": 124.8755111694336, + "margin_dpo/margin_std": 190.36257934570312, + "step": 259 + }, + { + "KL/chosen_KL_mean": -206.97616577148438, + "KL/mean": -292.4883728027344, + "KL/rejected_KL_mean": -378.00054931640625, + "KL/std": 176.58251953125, + "epoch": 0.38179148311306904, + "fcm_dpo/beta": 0.002854567486792803, + "fcm_dpo/delta": -0.09267206490039825, + "fcm_dpo/margin": 171.02438354492188, + "fcm_dpo/q_t": 0.38960734009742737, + "grad_norm": 27.828453063964844, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.3447574973106384, + "logits/rejected": -0.36221379041671753, + "logps/chosen": -254.3938446044922, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08978271484375, + "logps/rejected": -473.09033203125, + "loss": 1.0221, + "margin_dpo/margin_mean": 171.02438354492188, + "margin_dpo/margin_std": 191.23297119140625, + "step": 260 + }, + { + "KL/chosen_KL_mean": -216.47369384765625, + "KL/mean": -295.7045593261719, + "KL/rejected_KL_mean": -374.9354553222656, + "KL/std": 191.41683959960938, + "epoch": 0.3832599118942731, + "fcm_dpo/beta": 0.002804287476465106, + "fcm_dpo/delta": -0.04664212465286255, + "fcm_dpo/margin": 158.46176147460938, + "fcm_dpo/q_t": 0.3995262384414673, + "grad_norm": 20.731279373168945, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.3055582344532013, + "logits/rejected": -0.3050229549407959, + "logps/chosen": -269.50506591796875, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -463.4504089355469, + "loss": 1.0723, + "margin_dpo/margin_mean": 158.46176147460938, + "margin_dpo/margin_std": 213.8656005859375, + "step": 261 + }, + { + "KL/chosen_KL_mean": -248.84571838378906, + "KL/mean": -314.5561218261719, + "KL/rejected_KL_mean": -380.2665100097656, + "KL/std": 172.76287841796875, + "epoch": 0.38472834067547723, + "fcm_dpo/beta": 0.00282662408426404, + "fcm_dpo/delta": 0.029231306165456772, + "fcm_dpo/margin": 131.42083740234375, + "fcm_dpo/q_t": 0.4136051535606384, + "grad_norm": 23.95509147644043, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.31161201000213623, + "logits/rejected": -0.2819845676422119, + "logps/chosen": -308.4658508300781, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -466.68505859375, + "loss": 1.1038, + "margin_dpo/margin_mean": 131.42083740234375, + "margin_dpo/margin_std": 170.3394317626953, + "step": 262 + }, + { + "KL/chosen_KL_mean": -222.57464599609375, + "KL/mean": -316.29998779296875, + "KL/rejected_KL_mean": -410.025390625, + "KL/std": 204.56842041015625, + "epoch": 0.38619676945668135, + "fcm_dpo/beta": 0.0027571117971092463, + "fcm_dpo/delta": -0.1240774393081665, + "fcm_dpo/margin": 187.4507598876953, + "fcm_dpo/q_t": 0.38271111249923706, + "grad_norm": 21.146869659423828, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.3639793395996094, + "logits/rejected": -0.3423152267932892, + "logps/chosen": -281.99560546875, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -506.8825988769531, + "loss": 1.0183, + "margin_dpo/margin_mean": 187.45074462890625, + "margin_dpo/margin_std": 219.32289123535156, + "step": 263 + }, + { + "KL/chosen_KL_mean": -233.87161254882812, + "KL/mean": -311.49884033203125, + "KL/rejected_KL_mean": -389.1260681152344, + "KL/std": 185.69674682617188, + "epoch": 0.3876651982378855, + "fcm_dpo/beta": 0.002720474498346448, + "fcm_dpo/delta": -0.024707935750484467, + "fcm_dpo/margin": 155.25442504882812, + "fcm_dpo/q_t": 0.4044179320335388, + "grad_norm": 23.543067932128906, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.40215420722961426, + "logits/rejected": -0.3942739963531494, + "logps/chosen": -296.59368896484375, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85620880126953, + "logps/rejected": -482.9822998046875, + "loss": 1.0891, + "margin_dpo/margin_mean": 155.25442504882812, + "margin_dpo/margin_std": 211.25302124023438, + "step": 264 + }, + { + "KL/chosen_KL_mean": -253.85220336914062, + "KL/mean": -328.5089111328125, + "KL/rejected_KL_mean": -403.16558837890625, + "KL/std": 207.49630737304688, + "epoch": 0.3891336270190896, + "fcm_dpo/beta": 0.0027331099845469, + "fcm_dpo/delta": -0.008440444245934486, + "fcm_dpo/margin": 149.31336975097656, + "fcm_dpo/q_t": 0.40931302309036255, + "grad_norm": 23.92888832092285, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.29150500893592834, + "logits/rejected": -0.2723013758659363, + "logps/chosen": -315.82366943359375, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -491.1861877441406, + "loss": 1.1199, + "margin_dpo/margin_mean": 149.31336975097656, + "margin_dpo/margin_std": 244.71060180664062, + "step": 265 + }, + { + "KL/chosen_KL_mean": -257.31298828125, + "KL/mean": -317.381591796875, + "KL/rejected_KL_mean": -377.4501647949219, + "KL/std": 173.2138671875, + "epoch": 0.39060205580029367, + "fcm_dpo/beta": 0.002761277835816145, + "fcm_dpo/delta": 0.07062655687332153, + "fcm_dpo/margin": 120.13714599609375, + "fcm_dpo/q_t": 0.42465198040008545, + "grad_norm": 50.139766693115234, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.34931957721710205, + "logits/rejected": -0.30927109718322754, + "logps/chosen": -324.41265869140625, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -445.42138671875, + "loss": 1.1531, + "margin_dpo/margin_mean": 120.13715362548828, + "margin_dpo/margin_std": 199.931640625, + "step": 266 + }, + { + "KL/chosen_KL_mean": -230.34881591796875, + "KL/mean": -303.38250732421875, + "KL/rejected_KL_mean": -376.41619873046875, + "KL/std": 187.39883422851562, + "epoch": 0.3920704845814978, + "fcm_dpo/beta": 0.0027605746872723103, + "fcm_dpo/delta": -0.0036356858909130096, + "fcm_dpo/margin": 146.06735229492188, + "fcm_dpo/q_t": 0.40831679105758667, + "grad_norm": 34.82039260864258, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.3749678134918213, + "logits/rejected": -0.34407055377960205, + "logps/chosen": -299.319580078125, + "logps/ref_chosen": -68.97075653076172, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -466.58465576171875, + "loss": 1.1023, + "margin_dpo/margin_mean": 146.06735229492188, + "margin_dpo/margin_std": 212.41896057128906, + "step": 267 + }, + { + "KL/chosen_KL_mean": -238.78897094726562, + "KL/mean": -314.13409423828125, + "KL/rejected_KL_mean": -389.47918701171875, + "KL/std": 180.19076538085938, + "epoch": 0.3935389133627019, + "fcm_dpo/beta": 0.002754632383584976, + "fcm_dpo/delta": -0.016000591218471527, + "fcm_dpo/margin": 150.69021606445312, + "fcm_dpo/q_t": 0.4059686064720154, + "grad_norm": 28.66946029663086, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.35488247871398926, + "logits/rejected": -0.3602542281150818, + "logps/chosen": -294.68927001953125, + "logps/ref_chosen": -55.90031051635742, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -491.1268310546875, + "loss": 1.0941, + "margin_dpo/margin_mean": 150.69021606445312, + "margin_dpo/margin_std": 216.28262329101562, + "step": 268 + }, + { + "KL/chosen_KL_mean": -251.7904052734375, + "KL/mean": -337.79144287109375, + "KL/rejected_KL_mean": -423.79254150390625, + "KL/std": 181.66836547851562, + "epoch": 0.39500734214390604, + "fcm_dpo/beta": 0.0027336953207850456, + "fcm_dpo/delta": -0.07361201196908951, + "fcm_dpo/margin": 172.00210571289062, + "fcm_dpo/q_t": 0.3932827413082123, + "grad_norm": 26.64388084411621, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.4130655527114868, + "logits/rejected": -0.39535683393478394, + "logps/chosen": -321.8299560546875, + "logps/ref_chosen": -70.03955841064453, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -531.1419067382812, + "loss": 1.0618, + "margin_dpo/margin_mean": 172.00210571289062, + "margin_dpo/margin_std": 230.4850616455078, + "step": 269 + }, + { + "KL/chosen_KL_mean": -218.31060791015625, + "KL/mean": -282.62591552734375, + "KL/rejected_KL_mean": -346.9412841796875, + "KL/std": 164.40748596191406, + "epoch": 0.3964757709251101, + "fcm_dpo/beta": 0.0027484484016895294, + "fcm_dpo/delta": 0.04783637449145317, + "fcm_dpo/margin": 128.63064575195312, + "fcm_dpo/q_t": 0.41904473304748535, + "grad_norm": 33.281341552734375, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.43409767746925354, + "logits/rejected": -0.4289151430130005, + "logps/chosen": -287.8440856933594, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -456.8699035644531, + "loss": 1.1378, + "margin_dpo/margin_mean": 128.6306610107422, + "margin_dpo/margin_std": 204.2493896484375, + "step": 270 + }, + { + "KL/chosen_KL_mean": -209.23326110839844, + "KL/mean": -294.4723205566406, + "KL/rejected_KL_mean": -379.71136474609375, + "KL/std": 168.14682006835938, + "epoch": 0.39794419970631423, + "fcm_dpo/beta": 0.002710944041609764, + "fcm_dpo/delta": -0.06562402844429016, + "fcm_dpo/margin": 170.47811889648438, + "fcm_dpo/q_t": 0.39391976594924927, + "grad_norm": 23.12546730041504, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.3764873743057251, + "logits/rejected": -0.36014989018440247, + "logps/chosen": -265.9978332519531, + "logps/ref_chosen": -56.76456832885742, + "logps/ref_rejected": -92.51383972167969, + "logps/rejected": -472.2252197265625, + "loss": 1.0393, + "margin_dpo/margin_mean": 170.47811889648438, + "margin_dpo/margin_std": 191.98031616210938, + "step": 271 + }, + { + "KL/chosen_KL_mean": -200.45635986328125, + "KL/mean": -302.6602783203125, + "KL/rejected_KL_mean": -404.86419677734375, + "KL/std": 187.67584228515625, + "epoch": 0.39941262848751835, + "fcm_dpo/beta": 0.0026611106004565954, + "fcm_dpo/delta": -0.15195293724536896, + "fcm_dpo/margin": 204.4078369140625, + "fcm_dpo/q_t": 0.3735220432281494, + "grad_norm": 33.842586517333984, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.2986787259578705, + "logits/rejected": -0.31125104427337646, + "logps/chosen": -249.95352172851562, + "logps/ref_chosen": -49.497154235839844, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -510.406982421875, + "loss": 0.9774, + "margin_dpo/margin_mean": 204.4078369140625, + "margin_dpo/margin_std": 192.80450439453125, + "step": 272 + }, + { + "KL/chosen_KL_mean": -231.20156860351562, + "KL/mean": -327.5953369140625, + "KL/rejected_KL_mean": -423.98907470703125, + "KL/std": 184.35150146484375, + "epoch": 0.4008810572687225, + "fcm_dpo/beta": 0.0025815139524638653, + "fcm_dpo/delta": -0.1033368706703186, + "fcm_dpo/margin": 192.7875213623047, + "fcm_dpo/q_t": 0.38513267040252686, + "grad_norm": 30.54670524597168, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.2978020906448364, + "logits/rejected": -0.2806628346443176, + "logps/chosen": -294.1769714355469, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -516.4876708984375, + "loss": 1.0174, + "margin_dpo/margin_mean": 192.7875213623047, + "margin_dpo/margin_std": 209.65719604492188, + "step": 273 + }, + { + "KL/chosen_KL_mean": -276.0918884277344, + "KL/mean": -348.8900146484375, + "KL/rejected_KL_mean": -421.68817138671875, + "KL/std": 173.11854553222656, + "epoch": 0.4023494860499266, + "fcm_dpo/beta": 0.002592704724520445, + "fcm_dpo/delta": 0.023143114522099495, + "fcm_dpo/margin": 145.5963134765625, + "fcm_dpo/q_t": 0.41287532448768616, + "grad_norm": 26.89436149597168, + "learning_rate": 3.75e-07, + "logits/chosen": -0.2569617033004761, + "logits/rejected": -0.24136140942573547, + "logps/chosen": -331.75958251953125, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -499.0212707519531, + "loss": 1.1059, + "margin_dpo/margin_mean": 145.5963134765625, + "margin_dpo/margin_std": 196.85382080078125, + "step": 274 + }, + { + "KL/chosen_KL_mean": -210.4385986328125, + "KL/mean": -290.48822021484375, + "KL/rejected_KL_mean": -370.5378112792969, + "KL/std": 176.77706909179688, + "epoch": 0.40381791483113066, + "fcm_dpo/beta": 0.0025886246003210545, + "fcm_dpo/delta": -0.015149945393204689, + "fcm_dpo/margin": 160.09922790527344, + "fcm_dpo/q_t": 0.403909832239151, + "grad_norm": 26.170629501342773, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.31770947575569153, + "logits/rejected": -0.32402610778808594, + "logps/chosen": -259.0332946777344, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -463.84149169921875, + "loss": 1.0756, + "margin_dpo/margin_mean": 160.09922790527344, + "margin_dpo/margin_std": 196.3067169189453, + "step": 275 + }, + { + "KL/chosen_KL_mean": -227.3229217529297, + "KL/mean": -307.79864501953125, + "KL/rejected_KL_mean": -388.2743835449219, + "KL/std": 183.3173065185547, + "epoch": 0.4052863436123348, + "fcm_dpo/beta": 0.0025693178176879883, + "fcm_dpo/delta": -0.014239070937037468, + "fcm_dpo/margin": 160.95144653320312, + "fcm_dpo/q_t": 0.4061383008956909, + "grad_norm": 33.1538200378418, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.2713956832885742, + "logits/rejected": -0.2456541657447815, + "logps/chosen": -283.90032958984375, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -458.64007568359375, + "loss": 1.0897, + "margin_dpo/margin_mean": 160.95144653320312, + "margin_dpo/margin_std": 220.95907592773438, + "step": 276 + }, + { + "KL/chosen_KL_mean": -251.29473876953125, + "KL/mean": -333.46185302734375, + "KL/rejected_KL_mean": -415.6289978027344, + "KL/std": 182.88909912109375, + "epoch": 0.4067547723935389, + "fcm_dpo/beta": 0.002568996511399746, + "fcm_dpo/delta": -0.023184221237897873, + "fcm_dpo/margin": 164.33425903320312, + "fcm_dpo/q_t": 0.4032415747642517, + "grad_norm": 23.64580726623535, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.2841571569442749, + "logits/rejected": -0.26456978917121887, + "logps/chosen": -307.5662841796875, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -508.51025390625, + "loss": 1.082, + "margin_dpo/margin_mean": 164.33425903320312, + "margin_dpo/margin_std": 218.15975952148438, + "step": 277 + }, + { + "KL/chosen_KL_mean": -219.04319763183594, + "KL/mean": -315.867919921875, + "KL/rejected_KL_mean": -412.692626953125, + "KL/std": 195.05999755859375, + "epoch": 0.40822320117474303, + "fcm_dpo/beta": 0.0025190459564328194, + "fcm_dpo/delta": -0.09271565079689026, + "fcm_dpo/margin": 193.64944458007812, + "fcm_dpo/q_t": 0.38852113485336304, + "grad_norm": 24.158504486083984, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.27769631147384644, + "logits/rejected": -0.2823639214038849, + "logps/chosen": -271.9851379394531, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -503.94622802734375, + "loss": 1.026, + "margin_dpo/margin_mean": 193.6494598388672, + "margin_dpo/margin_std": 218.25051879882812, + "step": 278 + }, + { + "KL/chosen_KL_mean": -250.65829467773438, + "KL/mean": -346.52783203125, + "KL/rejected_KL_mean": -442.3973388671875, + "KL/std": 203.59503173828125, + "epoch": 0.40969162995594716, + "fcm_dpo/beta": 0.0024712784215807915, + "fcm_dpo/delta": -0.07853814959526062, + "fcm_dpo/margin": 191.73904418945312, + "fcm_dpo/q_t": 0.39302849769592285, + "grad_norm": 27.312673568725586, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.24171388149261475, + "logits/rejected": -0.2399359941482544, + "logps/chosen": -299.29962158203125, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -530.248779296875, + "loss": 1.0546, + "margin_dpo/margin_mean": 191.73904418945312, + "margin_dpo/margin_std": 247.916748046875, + "step": 279 + }, + { + "KL/chosen_KL_mean": -246.4661865234375, + "KL/mean": -343.66387939453125, + "KL/rejected_KL_mean": -440.861572265625, + "KL/std": 186.5137939453125, + "epoch": 0.4111600587371512, + "fcm_dpo/beta": 0.0024436868261545897, + "fcm_dpo/delta": -0.0788697600364685, + "fcm_dpo/margin": 194.39535522460938, + "fcm_dpo/q_t": 0.3888060450553894, + "grad_norm": 25.471012115478516, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.2643176317214966, + "logits/rejected": -0.2642500698566437, + "logps/chosen": -305.2633056640625, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -539.4804077148438, + "loss": 1.0254, + "margin_dpo/margin_mean": 194.39535522460938, + "margin_dpo/margin_std": 205.177001953125, + "step": 280 + }, + { + "KL/chosen_KL_mean": -228.09291076660156, + "KL/mean": -314.8627624511719, + "KL/rejected_KL_mean": -401.6325988769531, + "KL/std": 182.05502319335938, + "epoch": 0.41262848751835535, + "fcm_dpo/beta": 0.0024358248338103294, + "fcm_dpo/delta": -0.02387945167720318, + "fcm_dpo/margin": 173.53970336914062, + "fcm_dpo/q_t": 0.3996858596801758, + "grad_norm": 20.681076049804688, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.32415997982025146, + "logits/rejected": -0.2965441346168518, + "logps/chosen": -283.5814208984375, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -482.51519775390625, + "loss": 1.0639, + "margin_dpo/margin_mean": 173.53970336914062, + "margin_dpo/margin_std": 196.98211669921875, + "step": 281 + }, + { + "KL/chosen_KL_mean": -254.01113891601562, + "KL/mean": -322.19805908203125, + "KL/rejected_KL_mean": -390.38494873046875, + "KL/std": 191.5809326171875, + "epoch": 0.41409691629955947, + "fcm_dpo/beta": 0.0024580340832471848, + "fcm_dpo/delta": 0.06647245585918427, + "fcm_dpo/margin": 136.3738555908203, + "fcm_dpo/q_t": 0.42514273524284363, + "grad_norm": 22.19266128540039, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.3909962475299835, + "logits/rejected": -0.3682512640953064, + "logps/chosen": -327.081298828125, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -485.7359619140625, + "loss": 1.1443, + "margin_dpo/margin_mean": 136.37387084960938, + "margin_dpo/margin_std": 219.83755493164062, + "step": 282 + }, + { + "KL/chosen_KL_mean": -266.194091796875, + "KL/mean": -369.5931091308594, + "KL/rejected_KL_mean": -472.99212646484375, + "KL/std": 225.01544189453125, + "epoch": 0.4155653450807636, + "fcm_dpo/beta": 0.0024265965912491083, + "fcm_dpo/delta": -0.10707136243581772, + "fcm_dpo/margin": 206.79803466796875, + "fcm_dpo/q_t": 0.38304460048675537, + "grad_norm": 30.945764541625977, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.31001657247543335, + "logits/rejected": -0.3113076388835907, + "logps/chosen": -328.092529296875, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -569.9786376953125, + "loss": 1.0088, + "margin_dpo/margin_mean": 206.79803466796875, + "margin_dpo/margin_std": 210.00332641601562, + "step": 283 + }, + { + "KL/chosen_KL_mean": -263.2822265625, + "KL/mean": -357.4686279296875, + "KL/rejected_KL_mean": -451.6550598144531, + "KL/std": 208.6199951171875, + "epoch": 0.4170337738619677, + "fcm_dpo/beta": 0.0023820092901587486, + "fcm_dpo/delta": -0.051164183765649796, + "fcm_dpo/margin": 188.37283325195312, + "fcm_dpo/q_t": 0.3964000344276428, + "grad_norm": 29.025760650634766, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.3494116961956024, + "logits/rejected": -0.33238211274147034, + "logps/chosen": -321.7177734375, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -545.1243286132812, + "loss": 1.0441, + "margin_dpo/margin_mean": 188.3728485107422, + "margin_dpo/margin_std": 209.56866455078125, + "step": 284 + }, + { + "KL/chosen_KL_mean": -302.94207763671875, + "KL/mean": -389.82421875, + "KL/rejected_KL_mean": -476.7063903808594, + "KL/std": 186.600830078125, + "epoch": 0.4185022026431718, + "fcm_dpo/beta": 0.0023880950175225735, + "fcm_dpo/delta": -0.016177460551261902, + "fcm_dpo/margin": 173.7642822265625, + "fcm_dpo/q_t": 0.401868611574173, + "grad_norm": 24.644567489624023, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.3161693811416626, + "logits/rejected": -0.29976439476013184, + "logps/chosen": -369.1742858886719, + "logps/ref_chosen": -66.23219299316406, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -575.833251953125, + "loss": 1.0727, + "margin_dpo/margin_mean": 173.7642822265625, + "margin_dpo/margin_std": 203.36099243164062, + "step": 285 + }, + { + "KL/chosen_KL_mean": -321.2906494140625, + "KL/mean": -415.0553283691406, + "KL/rejected_KL_mean": -508.82000732421875, + "KL/std": 211.68765258789062, + "epoch": 0.4199706314243759, + "fcm_dpo/beta": 0.0023618116974830627, + "fcm_dpo/delta": -0.044894296675920486, + "fcm_dpo/margin": 187.52935791015625, + "fcm_dpo/q_t": 0.3975561857223511, + "grad_norm": 32.87556457519531, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.33208775520324707, + "logits/rejected": -0.31639528274536133, + "logps/chosen": -394.24163818359375, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -597.408447265625, + "loss": 1.0606, + "margin_dpo/margin_mean": 187.52935791015625, + "margin_dpo/margin_std": 228.80307006835938, + "step": 286 + }, + { + "KL/chosen_KL_mean": -296.5609130859375, + "KL/mean": -373.24261474609375, + "KL/rejected_KL_mean": -449.92431640625, + "KL/std": 189.50225830078125, + "epoch": 0.42143906020558003, + "fcm_dpo/beta": 0.0023672073148190975, + "fcm_dpo/delta": 0.038290925323963165, + "fcm_dpo/margin": 153.36337280273438, + "fcm_dpo/q_t": 0.41586506366729736, + "grad_norm": 30.704463958740234, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.2822296619415283, + "logits/rejected": -0.2562822699546814, + "logps/chosen": -358.1020812988281, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.69607543945312, + "logps/rejected": -527.620361328125, + "loss": 1.1102, + "margin_dpo/margin_mean": 153.36337280273438, + "margin_dpo/margin_std": 203.52426147460938, + "step": 287 + }, + { + "KL/chosen_KL_mean": -283.6439514160156, + "KL/mean": -377.3069763183594, + "KL/rejected_KL_mean": -470.969970703125, + "KL/std": 187.588134765625, + "epoch": 0.42290748898678415, + "fcm_dpo/beta": 0.0023379437625408173, + "fcm_dpo/delta": -0.04108269885182381, + "fcm_dpo/margin": 187.32608032226562, + "fcm_dpo/q_t": 0.3972797393798828, + "grad_norm": 23.47823715209961, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.32454603910446167, + "logits/rejected": -0.30999091267585754, + "logps/chosen": -340.30517578125, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.33570098876953, + "logps/rejected": -558.3056640625, + "loss": 1.0495, + "margin_dpo/margin_mean": 187.32608032226562, + "margin_dpo/margin_std": 196.14199829101562, + "step": 288 + }, + { + "KL/chosen_KL_mean": -265.7540588378906, + "KL/mean": -373.11285400390625, + "KL/rejected_KL_mean": -480.47161865234375, + "KL/std": 206.18136596679688, + "epoch": 0.4243759177679883, + "fcm_dpo/beta": 0.0023207864724099636, + "fcm_dpo/delta": -0.10334105789661407, + "fcm_dpo/margin": 214.71755981445312, + "fcm_dpo/q_t": 0.3854060769081116, + "grad_norm": 26.55727195739746, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.26507318019866943, + "logits/rejected": -0.2650468349456787, + "logps/chosen": -310.98443603515625, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -568.1142578125, + "loss": 1.0143, + "margin_dpo/margin_mean": 214.71755981445312, + "margin_dpo/margin_std": 228.02871704101562, + "step": 289 + }, + { + "KL/chosen_KL_mean": -276.853759765625, + "KL/mean": -383.96502685546875, + "KL/rejected_KL_mean": -491.07623291015625, + "KL/std": 217.60955810546875, + "epoch": 0.42584434654919234, + "fcm_dpo/beta": 0.0022673578932881355, + "fcm_dpo/delta": -0.09031336009502411, + "fcm_dpo/margin": 214.22250366210938, + "fcm_dpo/q_t": 0.3898102939128876, + "grad_norm": 20.6153621673584, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.3317207098007202, + "logits/rejected": -0.3523035943508148, + "logps/chosen": -332.32525634765625, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -607.7847900390625, + "loss": 1.0356, + "margin_dpo/margin_mean": 214.22250366210938, + "margin_dpo/margin_std": 258.4100341796875, + "step": 290 + }, + { + "KL/chosen_KL_mean": -230.03976440429688, + "KL/mean": -328.71478271484375, + "KL/rejected_KL_mean": -427.3897705078125, + "KL/std": 188.07647705078125, + "epoch": 0.42731277533039647, + "fcm_dpo/beta": 0.0022549815475940704, + "fcm_dpo/delta": -0.047311414033174515, + "fcm_dpo/margin": 197.35003662109375, + "fcm_dpo/q_t": 0.39613714814186096, + "grad_norm": 26.71583366394043, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.34385907649993896, + "logits/rejected": -0.3322584629058838, + "logps/chosen": -293.30010986328125, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -516.6868896484375, + "loss": 1.052, + "margin_dpo/margin_mean": 197.35003662109375, + "margin_dpo/margin_std": 223.89544677734375, + "step": 291 + }, + { + "KL/chosen_KL_mean": -248.18878173828125, + "KL/mean": -349.1991882324219, + "KL/rejected_KL_mean": -450.2095947265625, + "KL/std": 218.77377319335938, + "epoch": 0.4287812041116006, + "fcm_dpo/beta": 0.0022292518988251686, + "fcm_dpo/delta": -0.05277468264102936, + "fcm_dpo/margin": 202.0208282470703, + "fcm_dpo/q_t": 0.3962145447731018, + "grad_norm": 23.873905181884766, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.31092000007629395, + "logits/rejected": -0.2947191596031189, + "logps/chosen": -302.1072998046875, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -540.1710205078125, + "loss": 1.0614, + "margin_dpo/margin_mean": 202.02084350585938, + "margin_dpo/margin_std": 255.2861328125, + "step": 292 + }, + { + "KL/chosen_KL_mean": -252.22222900390625, + "KL/mean": -329.9274597167969, + "KL/rejected_KL_mean": -407.6326904296875, + "KL/std": 214.19297790527344, + "epoch": 0.4302496328928047, + "fcm_dpo/beta": 0.0022371455561369658, + "fcm_dpo/delta": 0.05408930033445358, + "fcm_dpo/margin": 155.41049194335938, + "fcm_dpo/q_t": 0.42154398560523987, + "grad_norm": 44.14265060424805, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.32412204146385193, + "logits/rejected": -0.3133804202079773, + "logps/chosen": -312.5982666015625, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.85244750976562, + "logps/rejected": -485.4851379394531, + "loss": 1.1388, + "margin_dpo/margin_mean": 155.41049194335938, + "margin_dpo/margin_std": 247.1007080078125, + "step": 293 + }, + { + "KL/chosen_KL_mean": -224.80003356933594, + "KL/mean": -312.6026916503906, + "KL/rejected_KL_mean": -400.40533447265625, + "KL/std": 189.26589965820312, + "epoch": 0.43171806167400884, + "fcm_dpo/beta": 0.0022378209978342056, + "fcm_dpo/delta": 0.0072397105395793915, + "fcm_dpo/margin": 175.60528564453125, + "fcm_dpo/q_t": 0.4101282060146332, + "grad_norm": 27.088520050048828, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.30610185861587524, + "logits/rejected": -0.31053173542022705, + "logps/chosen": -272.8875732421875, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -482.30230712890625, + "loss": 1.0958, + "margin_dpo/margin_mean": 175.60528564453125, + "margin_dpo/margin_std": 234.76486206054688, + "step": 294 + }, + { + "KL/chosen_KL_mean": -276.4921875, + "KL/mean": -373.5928955078125, + "KL/rejected_KL_mean": -470.69366455078125, + "KL/std": 215.0086669921875, + "epoch": 0.4331864904552129, + "fcm_dpo/beta": 0.002234848216176033, + "fcm_dpo/delta": -0.03554647043347359, + "fcm_dpo/margin": 194.2014617919922, + "fcm_dpo/q_t": 0.4011088013648987, + "grad_norm": 29.52766990661621, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.34492525458335876, + "logits/rejected": -0.34738168120384216, + "logps/chosen": -326.4168701171875, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -558.1499633789062, + "loss": 1.0748, + "margin_dpo/margin_mean": 194.2014617919922, + "margin_dpo/margin_std": 255.34695434570312, + "step": 295 + }, + { + "KL/chosen_KL_mean": -349.96075439453125, + "KL/mean": -420.53668212890625, + "KL/rejected_KL_mean": -491.1127014160156, + "KL/std": 214.16937255859375, + "epoch": 0.434654919236417, + "fcm_dpo/beta": 0.0022216294892132282, + "fcm_dpo/delta": -0.01307538989931345, + "fcm_dpo/margin": 141.15194702148438, + "fcm_dpo/q_t": 0.4277653992176056, + "grad_norm": 30.11356544494629, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.35794180631637573, + "logits/rejected": -0.3423447906970978, + "logps/chosen": -415.45196533203125, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -584.2017822265625, + "loss": 1.1858, + "margin_dpo/margin_mean": 141.15194702148438, + "margin_dpo/margin_std": 273.21856689453125, + "step": 296 + }, + { + "KL/chosen_KL_mean": -306.0766906738281, + "KL/mean": -389.6324768066406, + "KL/rejected_KL_mean": -473.188232421875, + "KL/std": 202.3120574951172, + "epoch": 0.43612334801762115, + "fcm_dpo/beta": 0.0022283056750893593, + "fcm_dpo/delta": 0.028696084395051003, + "fcm_dpo/margin": 167.111572265625, + "fcm_dpo/q_t": 0.41309747099876404, + "grad_norm": 26.392620086669922, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.3816351294517517, + "logits/rejected": -0.38721585273742676, + "logps/chosen": -362.55364990234375, + "logps/ref_chosen": -56.476951599121094, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -568.3267822265625, + "loss": 1.0975, + "margin_dpo/margin_mean": 167.111572265625, + "margin_dpo/margin_std": 204.6174774169922, + "step": 297 + }, + { + "KL/chosen_KL_mean": -336.15673828125, + "KL/mean": -440.7702941894531, + "KL/rejected_KL_mean": -545.3837890625, + "KL/std": 268.1351318359375, + "epoch": 0.43759177679882527, + "fcm_dpo/beta": 0.002221038332208991, + "fcm_dpo/delta": -0.06791778653860092, + "fcm_dpo/margin": 209.22708129882812, + "fcm_dpo/q_t": 0.39830517768859863, + "grad_norm": 23.501754760742188, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.3646508455276489, + "logits/rejected": -0.382364422082901, + "logps/chosen": -403.48193359375, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -662.0460205078125, + "loss": 1.0784, + "margin_dpo/margin_mean": 209.2270965576172, + "margin_dpo/margin_std": 309.3797607421875, + "step": 298 + }, + { + "KL/chosen_KL_mean": -277.727783203125, + "KL/mean": -368.62017822265625, + "KL/rejected_KL_mean": -459.5125732421875, + "KL/std": 201.8565673828125, + "epoch": 0.4390602055800294, + "fcm_dpo/beta": 0.0022161747328937054, + "fcm_dpo/delta": -0.0036800652742385864, + "fcm_dpo/margin": 181.78482055664062, + "fcm_dpo/q_t": 0.4088858366012573, + "grad_norm": 30.56585121154785, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.38511306047439575, + "logits/rejected": -0.39016664028167725, + "logps/chosen": -326.68988037109375, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -543.8408203125, + "loss": 1.0997, + "margin_dpo/margin_mean": 181.78482055664062, + "margin_dpo/margin_std": 254.47584533691406, + "step": 299 + }, + { + "KL/chosen_KL_mean": -359.6534423828125, + "KL/mean": -468.8331604003906, + "KL/rejected_KL_mean": -578.0128784179688, + "KL/std": 245.97247314453125, + "epoch": 0.44052863436123346, + "fcm_dpo/beta": 0.0021824706345796585, + "fcm_dpo/delta": -0.08032761514186859, + "fcm_dpo/margin": 218.35943603515625, + "fcm_dpo/q_t": 0.3919123411178589, + "grad_norm": 32.0359992980957, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.3638511300086975, + "logits/rejected": -0.36910757422447205, + "logps/chosen": -418.7271728515625, + "logps/ref_chosen": -59.07371139526367, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -673.9793701171875, + "loss": 1.0673, + "margin_dpo/margin_mean": 218.3594512939453, + "margin_dpo/margin_std": 301.63983154296875, + "step": 300 + }, + { + "KL/chosen_KL_mean": -299.93310546875, + "KL/mean": -405.40899658203125, + "KL/rejected_KL_mean": -510.8848571777344, + "KL/std": 220.83499145507812, + "epoch": 0.4419970631424376, + "fcm_dpo/beta": 0.0021431921049952507, + "fcm_dpo/delta": -0.05494837090373039, + "fcm_dpo/margin": 210.9517822265625, + "fcm_dpo/q_t": 0.397805392742157, + "grad_norm": 23.908777236938477, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.3962569832801819, + "logits/rejected": -0.39823591709136963, + "logps/chosen": -357.1824951171875, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -603.2384033203125, + "loss": 1.0574, + "margin_dpo/margin_mean": 210.9517822265625, + "margin_dpo/margin_std": 264.4083557128906, + "step": 301 + }, + { + "KL/chosen_KL_mean": -250.50064086914062, + "KL/mean": -340.6607971191406, + "KL/rejected_KL_mean": -430.82098388671875, + "KL/std": 200.06076049804688, + "epoch": 0.4434654919236417, + "fcm_dpo/beta": 0.0021400072146207094, + "fcm_dpo/delta": 0.014417506754398346, + "fcm_dpo/margin": 180.3203125, + "fcm_dpo/q_t": 0.4114646017551422, + "grad_norm": 25.441043853759766, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.37367284297943115, + "logits/rejected": -0.3793327212333679, + "logps/chosen": -301.6986083984375, + "logps/ref_chosen": -51.197994232177734, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -528.04736328125, + "loss": 1.0961, + "margin_dpo/margin_mean": 180.3203125, + "margin_dpo/margin_std": 233.26841735839844, + "step": 302 + }, + { + "KL/chosen_KL_mean": -252.14584350585938, + "KL/mean": -333.20892333984375, + "KL/rejected_KL_mean": -414.27197265625, + "KL/std": 197.92535400390625, + "epoch": 0.44493392070484583, + "fcm_dpo/beta": 0.00216277944855392, + "fcm_dpo/delta": 0.05117795616388321, + "fcm_dpo/margin": 162.12612915039062, + "fcm_dpo/q_t": 0.41918832063674927, + "grad_norm": 25.745454788208008, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.3918335437774658, + "logits/rejected": -0.3803231716156006, + "logps/chosen": -318.85980224609375, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -501.2174072265625, + "loss": 1.1285, + "margin_dpo/margin_mean": 162.12612915039062, + "margin_dpo/margin_std": 235.95956420898438, + "step": 303 + }, + { + "KL/chosen_KL_mean": -221.95501708984375, + "KL/mean": -313.56036376953125, + "KL/rejected_KL_mean": -405.16571044921875, + "KL/std": 181.8985137939453, + "epoch": 0.44640234948604995, + "fcm_dpo/beta": 0.002179923001676798, + "fcm_dpo/delta": 0.00038868188858032227, + "fcm_dpo/margin": 183.21072387695312, + "fcm_dpo/q_t": 0.40427181124687195, + "grad_norm": 44.084224700927734, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.4207175672054291, + "logits/rejected": -0.41341572999954224, + "logps/chosen": -293.90570068359375, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -495.63775634765625, + "loss": 1.0561, + "margin_dpo/margin_mean": 183.21072387695312, + "margin_dpo/margin_std": 157.74085998535156, + "step": 304 + }, + { + "KL/chosen_KL_mean": -244.41680908203125, + "KL/mean": -326.9585876464844, + "KL/rejected_KL_mean": -409.5003662109375, + "KL/std": 211.88983154296875, + "epoch": 0.447870778267254, + "fcm_dpo/beta": 0.0021737192291766405, + "fcm_dpo/delta": 0.042266424745321274, + "fcm_dpo/margin": 165.08358764648438, + "fcm_dpo/q_t": 0.41834086179733276, + "grad_norm": 31.972244262695312, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.41458988189697266, + "logits/rejected": -0.39753109216690063, + "logps/chosen": -311.2120361328125, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -502.25494384765625, + "loss": 1.1229, + "margin_dpo/margin_mean": 165.08358764648438, + "margin_dpo/margin_std": 234.5511932373047, + "step": 305 + }, + { + "KL/chosen_KL_mean": -250.080322265625, + "KL/mean": -330.5018005371094, + "KL/rejected_KL_mean": -410.92327880859375, + "KL/std": 189.47010803222656, + "epoch": 0.44933920704845814, + "fcm_dpo/beta": 0.0022093781735748053, + "fcm_dpo/delta": 0.04615872725844383, + "fcm_dpo/margin": 160.84295654296875, + "fcm_dpo/q_t": 0.4171670079231262, + "grad_norm": 25.6021785736084, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.386802077293396, + "logits/rejected": -0.36289817094802856, + "logps/chosen": -319.76422119140625, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -496.08245849609375, + "loss": 1.1083, + "margin_dpo/margin_mean": 160.84295654296875, + "margin_dpo/margin_std": 201.04806518554688, + "step": 306 + }, + { + "KL/chosen_KL_mean": -236.98658752441406, + "KL/mean": -322.44061279296875, + "KL/rejected_KL_mean": -407.89459228515625, + "KL/std": 182.07717895507812, + "epoch": 0.45080763582966227, + "fcm_dpo/beta": 0.0022153654135763645, + "fcm_dpo/delta": 0.022209253162145615, + "fcm_dpo/margin": 170.90798950195312, + "fcm_dpo/q_t": 0.4120749235153198, + "grad_norm": 25.33635139465332, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.35360145568847656, + "logits/rejected": -0.3347788155078888, + "logps/chosen": -307.1520080566406, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -494.8669128417969, + "loss": 1.1004, + "margin_dpo/margin_mean": 170.90798950195312, + "margin_dpo/margin_std": 223.1322021484375, + "step": 307 + }, + { + "KL/chosen_KL_mean": -244.5159149169922, + "KL/mean": -342.49346923828125, + "KL/rejected_KL_mean": -440.47100830078125, + "KL/std": 209.0433349609375, + "epoch": 0.4522760646108664, + "fcm_dpo/beta": 0.0022103004157543182, + "fcm_dpo/delta": -0.03460945934057236, + "fcm_dpo/margin": 195.95510864257812, + "fcm_dpo/q_t": 0.39782798290252686, + "grad_norm": 35.38976287841797, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.33471328020095825, + "logits/rejected": -0.32273852825164795, + "logps/chosen": -299.76092529296875, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -519.84326171875, + "loss": 1.0462, + "margin_dpo/margin_mean": 195.95510864257812, + "margin_dpo/margin_std": 199.99911499023438, + "step": 308 + }, + { + "KL/chosen_KL_mean": -252.39642333984375, + "KL/mean": -347.4449157714844, + "KL/rejected_KL_mean": -442.4934387207031, + "KL/std": 221.7578125, + "epoch": 0.45374449339207046, + "fcm_dpo/beta": 0.0022104280069470406, + "fcm_dpo/delta": -0.02155473083257675, + "fcm_dpo/margin": 190.0969696044922, + "fcm_dpo/q_t": 0.40153375267982483, + "grad_norm": 46.212398529052734, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.3147510886192322, + "logits/rejected": -0.30644166469573975, + "logps/chosen": -301.35552978515625, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -524.8341674804688, + "loss": 1.0737, + "margin_dpo/margin_mean": 190.09698486328125, + "margin_dpo/margin_std": 229.8643798828125, + "step": 309 + }, + { + "KL/chosen_KL_mean": -293.9149169921875, + "KL/mean": -383.8049621582031, + "KL/rejected_KL_mean": -473.69500732421875, + "KL/std": 197.09872436523438, + "epoch": 0.4552129221732746, + "fcm_dpo/beta": 0.0022050600964576006, + "fcm_dpo/delta": 0.0034537650644779205, + "fcm_dpo/margin": 179.78012084960938, + "fcm_dpo/q_t": 0.40635180473327637, + "grad_norm": 33.55656051635742, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.3364931046962738, + "logits/rejected": -0.3217809200286865, + "logps/chosen": -356.65667724609375, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -553.6253051757812, + "loss": 1.072, + "margin_dpo/margin_mean": 179.7801055908203, + "margin_dpo/margin_std": 189.23294067382812, + "step": 310 + }, + { + "KL/chosen_KL_mean": -307.0128173828125, + "KL/mean": -407.85986328125, + "KL/rejected_KL_mean": -508.7069091796875, + "KL/std": 248.04620361328125, + "epoch": 0.4566813509544787, + "fcm_dpo/beta": 0.0021827276796102524, + "fcm_dpo/delta": -0.04212527349591255, + "fcm_dpo/margin": 201.69406127929688, + "fcm_dpo/q_t": 0.4002191424369812, + "grad_norm": 23.36595344543457, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.3425959348678589, + "logits/rejected": -0.31884661316871643, + "logps/chosen": -360.04083251953125, + "logps/ref_chosen": -53.02798080444336, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -586.1451416015625, + "loss": 1.0687, + "margin_dpo/margin_mean": 201.69406127929688, + "margin_dpo/margin_std": 261.9150085449219, + "step": 311 + }, + { + "KL/chosen_KL_mean": -291.6320495605469, + "KL/mean": -385.4327697753906, + "KL/rejected_KL_mean": -479.23345947265625, + "KL/std": 222.2432861328125, + "epoch": 0.4581497797356828, + "fcm_dpo/beta": 0.0021842336282134056, + "fcm_dpo/delta": -0.010445069521665573, + "fcm_dpo/margin": 187.60142517089844, + "fcm_dpo/q_t": 0.4080832004547119, + "grad_norm": 25.498783111572266, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.2573780417442322, + "logits/rejected": -0.2468489408493042, + "logps/chosen": -341.0242614746094, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280853271484, + "logps/rejected": -555.0262451171875, + "loss": 1.0923, + "margin_dpo/margin_mean": 187.6014404296875, + "margin_dpo/margin_std": 262.27001953125, + "step": 312 + }, + { + "KL/chosen_KL_mean": -274.81085205078125, + "KL/mean": -373.3189392089844, + "KL/rejected_KL_mean": -471.82708740234375, + "KL/std": 231.67160034179688, + "epoch": 0.45961820851688695, + "fcm_dpo/beta": 0.0021794200874865055, + "fcm_dpo/delta": -0.03143874555826187, + "fcm_dpo/margin": 197.0161895751953, + "fcm_dpo/q_t": 0.40177449584007263, + "grad_norm": 24.87454605102539, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.33512693643569946, + "logits/rejected": -0.3323609530925751, + "logps/chosen": -324.9635925292969, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -558.2332763671875, + "loss": 1.0768, + "margin_dpo/margin_mean": 197.0161895751953, + "margin_dpo/margin_std": 253.28038024902344, + "step": 313 + }, + { + "KL/chosen_KL_mean": -298.7161560058594, + "KL/mean": -388.66937255859375, + "KL/rejected_KL_mean": -478.62261962890625, + "KL/std": 222.7989501953125, + "epoch": 0.461086637298091, + "fcm_dpo/beta": 0.0021564702037721872, + "fcm_dpo/delta": 0.012253139168024063, + "fcm_dpo/margin": 179.90646362304688, + "fcm_dpo/q_t": 0.4130541682243347, + "grad_norm": 22.955171585083008, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.3842218816280365, + "logits/rejected": -0.3846893906593323, + "logps/chosen": -355.9537353515625, + "logps/ref_chosen": -57.237579345703125, + "logps/ref_rejected": -97.5965347290039, + "logps/rejected": -576.2191772460938, + "loss": 1.1194, + "margin_dpo/margin_mean": 179.90646362304688, + "margin_dpo/margin_std": 277.59881591796875, + "step": 314 + }, + { + "KL/chosen_KL_mean": -269.84991455078125, + "KL/mean": -350.29864501953125, + "KL/rejected_KL_mean": -430.747314453125, + "KL/std": 197.81268310546875, + "epoch": 0.46255506607929514, + "fcm_dpo/beta": 0.002180763054639101, + "fcm_dpo/delta": 0.050937261432409286, + "fcm_dpo/margin": 160.8974151611328, + "fcm_dpo/q_t": 0.41917771100997925, + "grad_norm": 24.254276275634766, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.30912429094314575, + "logits/rejected": -0.31343331933021545, + "logps/chosen": -318.91949462890625, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -516.42822265625, + "loss": 1.1207, + "margin_dpo/margin_mean": 160.8974151611328, + "margin_dpo/margin_std": 221.48760986328125, + "step": 315 + }, + { + "KL/chosen_KL_mean": -255.51815795898438, + "KL/mean": -372.13592529296875, + "KL/rejected_KL_mean": -488.75372314453125, + "KL/std": 227.74038696289062, + "epoch": 0.46402349486049926, + "fcm_dpo/beta": 0.00214382354170084, + "fcm_dpo/delta": -0.10627135634422302, + "fcm_dpo/margin": 233.23556518554688, + "fcm_dpo/q_t": 0.38564345240592957, + "grad_norm": 31.184669494628906, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.3521896004676819, + "logits/rejected": -0.35530799627304077, + "logps/chosen": -309.7789306640625, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -590.03515625, + "loss": 1.0134, + "margin_dpo/margin_mean": 233.23556518554688, + "margin_dpo/margin_std": 248.93862915039062, + "step": 316 + }, + { + "KL/chosen_KL_mean": -252.7716064453125, + "KL/mean": -380.02117919921875, + "KL/rejected_KL_mean": -507.27069091796875, + "KL/std": 206.77581787109375, + "epoch": 0.4654919236417034, + "fcm_dpo/beta": 0.002105377148836851, + "fcm_dpo/delta": -0.14325766265392303, + "fcm_dpo/margin": 254.4990997314453, + "fcm_dpo/q_t": 0.3748946189880371, + "grad_norm": 26.17375946044922, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.390718936920166, + "logits/rejected": -0.3826904892921448, + "logps/chosen": -308.8658447265625, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -607.9697265625, + "loss": 0.978, + "margin_dpo/margin_mean": 254.4990997314453, + "margin_dpo/margin_std": 230.30081176757812, + "step": 317 + }, + { + "KL/chosen_KL_mean": -269.4692687988281, + "KL/mean": -365.70343017578125, + "KL/rejected_KL_mean": -461.93756103515625, + "KL/std": 210.68841552734375, + "epoch": 0.4669603524229075, + "fcm_dpo/beta": 0.0020867723505944014, + "fcm_dpo/delta": -0.001836409792304039, + "fcm_dpo/margin": 192.4683380126953, + "fcm_dpo/q_t": 0.4067472815513611, + "grad_norm": 23.02154541015625, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.4208700656890869, + "logits/rejected": -0.3912370800971985, + "logps/chosen": -334.1149597167969, + "logps/ref_chosen": -64.64569854736328, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -544.7018432617188, + "loss": 1.0773, + "margin_dpo/margin_mean": 192.4683380126953, + "margin_dpo/margin_std": 227.6715850830078, + "step": 318 + }, + { + "KL/chosen_KL_mean": -239.6466064453125, + "KL/mean": -350.7529296875, + "KL/rejected_KL_mean": -461.8592529296875, + "KL/std": 224.13626098632812, + "epoch": 0.4684287812041116, + "fcm_dpo/beta": 0.00206323666498065, + "fcm_dpo/delta": -0.0612642765045166, + "fcm_dpo/margin": 222.212646484375, + "fcm_dpo/q_t": 0.3929300308227539, + "grad_norm": 28.023488998413086, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.38181760907173157, + "logits/rejected": -0.3934275507926941, + "logps/chosen": -289.0303649902344, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -575.7657470703125, + "loss": 1.0361, + "margin_dpo/margin_mean": 222.212646484375, + "margin_dpo/margin_std": 239.76544189453125, + "step": 319 + }, + { + "KL/chosen_KL_mean": -251.48712158203125, + "KL/mean": -370.67779541015625, + "KL/rejected_KL_mean": -489.8685302734375, + "KL/std": 234.5609130859375, + "epoch": 0.4698972099853157, + "fcm_dpo/beta": 0.0020233364775776863, + "fcm_dpo/delta": -0.08694636821746826, + "fcm_dpo/margin": 238.38136291503906, + "fcm_dpo/q_t": 0.3885904550552368, + "grad_norm": 31.34676742553711, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.3809563219547272, + "logits/rejected": -0.3856205940246582, + "logps/chosen": -310.99200439453125, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66717529296875, + "logps/rejected": -587.53564453125, + "loss": 1.0213, + "margin_dpo/margin_mean": 238.38136291503906, + "margin_dpo/margin_std": 253.5137481689453, + "step": 320 + }, + { + "KL/chosen_KL_mean": -314.7099304199219, + "KL/mean": -417.4952392578125, + "KL/rejected_KL_mean": -520.2805786132812, + "KL/std": 242.76919555664062, + "epoch": 0.4713656387665198, + "fcm_dpo/beta": 0.0020058308728039265, + "fcm_dpo/delta": -0.013504378497600555, + "fcm_dpo/margin": 205.5706787109375, + "fcm_dpo/q_t": 0.4050200581550598, + "grad_norm": 22.613510131835938, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.3279907703399658, + "logits/rejected": -0.31290388107299805, + "logps/chosen": -376.25860595703125, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -611.921630859375, + "loss": 1.0825, + "margin_dpo/margin_mean": 205.5706787109375, + "margin_dpo/margin_std": 260.4750061035156, + "step": 321 + }, + { + "KL/chosen_KL_mean": -277.9368896484375, + "KL/mean": -393.11395263671875, + "KL/rejected_KL_mean": -508.2909851074219, + "KL/std": 222.57351684570312, + "epoch": 0.47283406754772395, + "fcm_dpo/beta": 0.0019898181781172752, + "fcm_dpo/delta": -0.06161149963736534, + "fcm_dpo/margin": 230.3541259765625, + "fcm_dpo/q_t": 0.3928527235984802, + "grad_norm": 27.584213256835938, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.3369908928871155, + "logits/rejected": -0.33506596088409424, + "logps/chosen": -335.2269287109375, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -604.0408935546875, + "loss": 1.0334, + "margin_dpo/margin_mean": 230.3541259765625, + "margin_dpo/margin_std": 238.74765014648438, + "step": 322 + }, + { + "KL/chosen_KL_mean": -308.34637451171875, + "KL/mean": -404.15570068359375, + "KL/rejected_KL_mean": -499.96502685546875, + "KL/std": 221.2327423095703, + "epoch": 0.47430249632892807, + "fcm_dpo/beta": 0.0019924892112612724, + "fcm_dpo/delta": 0.018896615132689476, + "fcm_dpo/margin": 191.61865234375, + "fcm_dpo/q_t": 0.4107508361339569, + "grad_norm": 37.98896789550781, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.3975059986114502, + "logits/rejected": -0.39840167760849, + "logps/chosen": -359.580322265625, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -575.0269775390625, + "loss": 1.0976, + "margin_dpo/margin_mean": 191.61865234375, + "margin_dpo/margin_std": 239.77487182617188, + "step": 323 + }, + { + "KL/chosen_KL_mean": -363.1470031738281, + "KL/mean": -459.2158203125, + "KL/rejected_KL_mean": -555.28466796875, + "KL/std": 245.32525634765625, + "epoch": 0.47577092511013214, + "fcm_dpo/beta": 0.0020171115174889565, + "fcm_dpo/delta": 0.011590391397476196, + "fcm_dpo/margin": 192.13768005371094, + "fcm_dpo/q_t": 0.4130977690219879, + "grad_norm": 45.15947723388672, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.382703959941864, + "logits/rejected": -0.36886465549468994, + "logps/chosen": -428.28216552734375, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750854492188, + "logps/rejected": -641.76220703125, + "loss": 1.1188, + "margin_dpo/margin_mean": 192.13768005371094, + "margin_dpo/margin_std": 280.11346435546875, + "step": 324 + }, + { + "KL/chosen_KL_mean": -291.805419921875, + "KL/mean": -372.7662353515625, + "KL/rejected_KL_mean": -453.72705078125, + "KL/std": 219.71487426757812, + "epoch": 0.47723935389133626, + "fcm_dpo/beta": 0.002026339527219534, + "fcm_dpo/delta": 0.07426172494888306, + "fcm_dpo/margin": 161.92161560058594, + "fcm_dpo/q_t": 0.4235909581184387, + "grad_norm": 27.52566909790039, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.38832759857177734, + "logits/rejected": -0.36508649587631226, + "logps/chosen": -348.02099609375, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.08592987060547, + "logps/rejected": -523.81298828125, + "loss": 1.1353, + "margin_dpo/margin_mean": 161.921630859375, + "margin_dpo/margin_std": 220.10113525390625, + "step": 325 + }, + { + "KL/chosen_KL_mean": -334.75213623046875, + "KL/mean": -415.12261962890625, + "KL/rejected_KL_mean": -495.49310302734375, + "KL/std": 209.2508544921875, + "epoch": 0.4787077826725404, + "fcm_dpo/beta": 0.00205246196128428, + "fcm_dpo/delta": 0.07252933084964752, + "fcm_dpo/margin": 160.740966796875, + "fcm_dpo/q_t": 0.42195820808410645, + "grad_norm": 49.80532455444336, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.4096953272819519, + "logits/rejected": -0.3885076642036438, + "logps/chosen": -407.47711181640625, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.8467788696289, + "logps/rejected": -575.33984375, + "loss": 1.1258, + "margin_dpo/margin_mean": 160.74095153808594, + "margin_dpo/margin_std": 199.02053833007812, + "step": 326 + }, + { + "KL/chosen_KL_mean": -289.6980895996094, + "KL/mean": -412.4832763671875, + "KL/rejected_KL_mean": -535.2684936523438, + "KL/std": 226.18698120117188, + "epoch": 0.4801762114537445, + "fcm_dpo/beta": 0.0020246244966983795, + "fcm_dpo/delta": -0.10278213024139404, + "fcm_dpo/margin": 245.57040405273438, + "fcm_dpo/q_t": 0.38342660665512085, + "grad_norm": 34.839141845703125, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.40583473443984985, + "logits/rejected": -0.3938768804073334, + "logps/chosen": -358.83251953125, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -647.2022705078125, + "loss": 1.0081, + "margin_dpo/margin_mean": 245.57040405273438, + "margin_dpo/margin_std": 245.6561279296875, + "step": 327 + }, + { + "KL/chosen_KL_mean": -313.56195068359375, + "KL/mean": -424.1883850097656, + "KL/rejected_KL_mean": -534.8148803710938, + "KL/std": 248.53616333007812, + "epoch": 0.48164464023494863, + "fcm_dpo/beta": 0.002008104231208563, + "fcm_dpo/delta": -0.046333495527505875, + "fcm_dpo/margin": 221.25289916992188, + "fcm_dpo/q_t": 0.3990749418735504, + "grad_norm": 31.601329803466797, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.37076377868652344, + "logits/rejected": -0.3748210668563843, + "logps/chosen": -373.2491455078125, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -625.6698608398438, + "loss": 1.0624, + "margin_dpo/margin_mean": 221.25289916992188, + "margin_dpo/margin_std": 280.74591064453125, + "step": 328 + }, + { + "KL/chosen_KL_mean": -348.364013671875, + "KL/mean": -463.31585693359375, + "KL/rejected_KL_mean": -578.2677001953125, + "KL/std": 264.98114013671875, + "epoch": 0.4831130690161527, + "fcm_dpo/beta": 0.001973442966118455, + "fcm_dpo/delta": -0.057223327457904816, + "fcm_dpo/margin": 229.90365600585938, + "fcm_dpo/q_t": 0.3950253129005432, + "grad_norm": 29.35655975341797, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.38779300451278687, + "logits/rejected": -0.3826950788497925, + "logps/chosen": -413.6102294921875, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -678.9653930664062, + "loss": 1.0626, + "margin_dpo/margin_mean": 229.90365600585938, + "margin_dpo/margin_std": 285.9508972167969, + "step": 329 + }, + { + "KL/chosen_KL_mean": -297.8939208984375, + "KL/mean": -418.2544250488281, + "KL/rejected_KL_mean": -538.614990234375, + "KL/std": 251.08877563476562, + "epoch": 0.4845814977973568, + "fcm_dpo/beta": 0.00195663096383214, + "fcm_dpo/delta": -0.07452473044395447, + "fcm_dpo/margin": 240.72103881835938, + "fcm_dpo/q_t": 0.3914690613746643, + "grad_norm": 24.276758193969727, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.37196603417396545, + "logits/rejected": -0.37232887744903564, + "logps/chosen": -344.89227294921875, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -625.4918212890625, + "loss": 1.0349, + "margin_dpo/margin_mean": 240.72105407714844, + "margin_dpo/margin_std": 271.8794860839844, + "step": 330 + }, + { + "KL/chosen_KL_mean": -324.826904296875, + "KL/mean": -441.84423828125, + "KL/rejected_KL_mean": -558.861572265625, + "KL/std": 205.87355041503906, + "epoch": 0.48604992657856094, + "fcm_dpo/beta": 0.0019327991176396608, + "fcm_dpo/delta": -0.054849639534950256, + "fcm_dpo/margin": 234.03463745117188, + "fcm_dpo/q_t": 0.3927758038043976, + "grad_norm": 30.4269962310791, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.4110090136528015, + "logits/rejected": -0.4085603356361389, + "logps/chosen": -375.3511657714844, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -647.876953125, + "loss": 1.02, + "margin_dpo/margin_mean": 234.03463745117188, + "margin_dpo/margin_std": 203.57736206054688, + "step": 331 + }, + { + "KL/chosen_KL_mean": -331.472412109375, + "KL/mean": -412.5047302246094, + "KL/rejected_KL_mean": -493.5369873046875, + "KL/std": 217.41600036621094, + "epoch": 0.48751835535976507, + "fcm_dpo/beta": 0.0019560197833925486, + "fcm_dpo/delta": 0.08553433418273926, + "fcm_dpo/margin": 162.0646209716797, + "fcm_dpo/q_t": 0.42724329233169556, + "grad_norm": 26.13042449951172, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.43391871452331543, + "logits/rejected": -0.4177435040473938, + "logps/chosen": -380.6527099609375, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -570.0221557617188, + "loss": 1.1466, + "margin_dpo/margin_mean": 162.0646209716797, + "margin_dpo/margin_std": 242.46429443359375, + "step": 332 + }, + { + "KL/chosen_KL_mean": -346.9090270996094, + "KL/mean": -456.2929382324219, + "KL/rejected_KL_mean": -565.6768798828125, + "KL/std": 254.33526611328125, + "epoch": 0.4889867841409692, + "fcm_dpo/beta": 0.0019417135044932365, + "fcm_dpo/delta": -0.02682226523756981, + "fcm_dpo/margin": 218.76780700683594, + "fcm_dpo/q_t": 0.40386736392974854, + "grad_norm": 26.93458366394043, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.43507999181747437, + "logits/rejected": -0.43221938610076904, + "logps/chosen": -410.664794921875, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -660.720947265625, + "loss": 1.0917, + "margin_dpo/margin_mean": 218.76780700683594, + "margin_dpo/margin_std": 306.374267578125, + "step": 333 + }, + { + "KL/chosen_KL_mean": -324.4681396484375, + "KL/mean": -426.024169921875, + "KL/rejected_KL_mean": -527.5802001953125, + "KL/std": 273.755859375, + "epoch": 0.49045521292217326, + "fcm_dpo/beta": 0.0019499869085848331, + "fcm_dpo/delta": 0.004075163975358009, + "fcm_dpo/margin": 203.11204528808594, + "fcm_dpo/q_t": 0.41010695695877075, + "grad_norm": 25.05001449584961, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.42128774523735046, + "logits/rejected": -0.4194262623786926, + "logps/chosen": -391.4478759765625, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -622.8970947265625, + "loss": 1.1117, + "margin_dpo/margin_mean": 203.112060546875, + "margin_dpo/margin_std": 303.74609375, + "step": 334 + }, + { + "KL/chosen_KL_mean": -345.0743408203125, + "KL/mean": -423.9620361328125, + "KL/rejected_KL_mean": -502.8497314453125, + "KL/std": 249.84854125976562, + "epoch": 0.4919236417033774, + "fcm_dpo/beta": 0.0019520404748618603, + "fcm_dpo/delta": -0.0027985575143247843, + "fcm_dpo/margin": 157.77536010742188, + "fcm_dpo/q_t": 0.42951834201812744, + "grad_norm": 27.41286277770996, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.45320796966552734, + "logits/rejected": -0.43336862325668335, + "logps/chosen": -407.6168212890625, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.61770629882812, + "logps/rejected": -590.4674072265625, + "loss": 1.1714, + "margin_dpo/margin_mean": 157.77536010742188, + "margin_dpo/margin_std": 273.3466491699219, + "step": 335 + }, + { + "KL/chosen_KL_mean": -327.80242919921875, + "KL/mean": -452.0769348144531, + "KL/rejected_KL_mean": -576.3514404296875, + "KL/std": 280.5369873046875, + "epoch": 0.4933920704845815, + "fcm_dpo/beta": 0.0019327957415953279, + "fcm_dpo/delta": -0.08437924087047577, + "fcm_dpo/margin": 248.549072265625, + "fcm_dpo/q_t": 0.3922100067138672, + "grad_norm": 27.173513412475586, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.41087979078292847, + "logits/rejected": -0.41120561957359314, + "logps/chosen": -382.33355712890625, + "logps/ref_chosen": -54.53115463256836, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -680.7556762695312, + "loss": 1.0543, + "margin_dpo/margin_mean": 248.549072265625, + "margin_dpo/margin_std": 324.4247131347656, + "step": 336 + }, + { + "KL/chosen_KL_mean": -295.12164306640625, + "KL/mean": -416.0631103515625, + "KL/rejected_KL_mean": -537.0045166015625, + "KL/std": 226.8846893310547, + "epoch": 0.4948604992657856, + "fcm_dpo/beta": 0.0018971418030560017, + "fcm_dpo/delta": -0.061946481466293335, + "fcm_dpo/margin": 241.8828887939453, + "fcm_dpo/q_t": 0.39354777336120605, + "grad_norm": 32.10745620727539, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.4443337321281433, + "logits/rejected": -0.4344029426574707, + "logps/chosen": -360.2503356933594, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -638.7315673828125, + "loss": 1.0376, + "margin_dpo/margin_mean": 241.88287353515625, + "margin_dpo/margin_std": 264.9962463378906, + "step": 337 + }, + { + "KL/chosen_KL_mean": -272.0726318359375, + "KL/mean": -388.24578857421875, + "KL/rejected_KL_mean": -504.41900634765625, + "KL/std": 222.21337890625, + "epoch": 0.49632892804698975, + "fcm_dpo/beta": 0.0018818873213604093, + "fcm_dpo/delta": -0.03903310373425484, + "fcm_dpo/margin": 232.34637451171875, + "fcm_dpo/q_t": 0.3970540463924408, + "grad_norm": 31.5482234954834, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.4323264956474304, + "logits/rejected": -0.4272562265396118, + "logps/chosen": -330.4953308105469, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -593.487548828125, + "loss": 1.0405, + "margin_dpo/margin_mean": 232.34637451171875, + "margin_dpo/margin_std": 230.95556640625, + "step": 338 + }, + { + "KL/chosen_KL_mean": -287.5809326171875, + "KL/mean": -402.3648986816406, + "KL/rejected_KL_mean": -517.1488647460938, + "KL/std": 230.27059936523438, + "epoch": 0.4977973568281938, + "fcm_dpo/beta": 0.0018855368252843618, + "fcm_dpo/delta": -0.03534376621246338, + "fcm_dpo/margin": 229.56790161132812, + "fcm_dpo/q_t": 0.3984706401824951, + "grad_norm": 28.834077835083008, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.473450243473053, + "logits/rejected": -0.46168074011802673, + "logps/chosen": -347.5762634277344, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -621.0597534179688, + "loss": 1.0496, + "margin_dpo/margin_mean": 229.56790161132812, + "margin_dpo/margin_std": 228.10092163085938, + "step": 339 + }, + { + "KL/chosen_KL_mean": -303.6623840332031, + "KL/mean": -404.23040771484375, + "KL/rejected_KL_mean": -504.79840087890625, + "KL/std": 221.79913330078125, + "epoch": 0.49926578560939794, + "fcm_dpo/beta": 0.001867425860837102, + "fcm_dpo/delta": 0.025164764374494553, + "fcm_dpo/margin": 201.13601684570312, + "fcm_dpo/q_t": 0.412939190864563, + "grad_norm": 31.226028442382812, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.3601798415184021, + "logits/rejected": -0.338324636220932, + "logps/chosen": -356.49261474609375, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723114013672, + "logps/rejected": -577.9056396484375, + "loss": 1.1047, + "margin_dpo/margin_mean": 201.13601684570312, + "margin_dpo/margin_std": 265.9939270019531, + "step": 340 + }, + { + "KL/chosen_KL_mean": -305.0142517089844, + "KL/mean": -421.94378662109375, + "KL/rejected_KL_mean": -538.8733520507812, + "KL/std": 246.80142211914062, + "epoch": 0.5007342143906021, + "fcm_dpo/beta": 0.001860608346760273, + "fcm_dpo/delta": -0.03702467307448387, + "fcm_dpo/margin": 233.8590850830078, + "fcm_dpo/q_t": 0.3988453149795532, + "grad_norm": 25.454540252685547, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.38325321674346924, + "logits/rejected": -0.38400086760520935, + "logps/chosen": -352.9140625, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -640.6832275390625, + "loss": 1.0527, + "margin_dpo/margin_mean": 233.8590850830078, + "margin_dpo/margin_std": 255.57666015625, + "step": 341 + }, + { + "KL/chosen_KL_mean": -308.7816162109375, + "KL/mean": -410.565673828125, + "KL/rejected_KL_mean": -512.3497314453125, + "KL/std": 233.906982421875, + "epoch": 0.5022026431718062, + "fcm_dpo/beta": 0.001875395653769374, + "fcm_dpo/delta": 0.018558282405138016, + "fcm_dpo/margin": 203.568115234375, + "fcm_dpo/q_t": 0.41176915168762207, + "grad_norm": 25.053524017333984, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.42473480105400085, + "logits/rejected": -0.40138232707977295, + "logps/chosen": -380.77825927734375, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -604.9393310546875, + "loss": 1.1045, + "margin_dpo/margin_mean": 203.568115234375, + "margin_dpo/margin_std": 277.70416259765625, + "step": 342 + }, + { + "KL/chosen_KL_mean": -299.65240478515625, + "KL/mean": -421.80462646484375, + "KL/rejected_KL_mean": -543.956787109375, + "KL/std": 237.9898223876953, + "epoch": 0.5036710719530103, + "fcm_dpo/beta": 0.0018535295967012644, + "fcm_dpo/delta": -0.05536113679409027, + "fcm_dpo/margin": 244.30438232421875, + "fcm_dpo/q_t": 0.39244067668914795, + "grad_norm": 22.644685745239258, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.44045504927635193, + "logits/rejected": -0.44200748205184937, + "logps/chosen": -354.05804443359375, + "logps/ref_chosen": -54.405616760253906, + "logps/ref_rejected": -111.04142761230469, + "logps/rejected": -654.9982299804688, + "loss": 1.0218, + "margin_dpo/margin_mean": 244.30438232421875, + "margin_dpo/margin_std": 211.76568603515625, + "step": 343 + }, + { + "KL/chosen_KL_mean": -298.38482666015625, + "KL/mean": -410.74530029296875, + "KL/rejected_KL_mean": -523.105712890625, + "KL/std": 269.9559326171875, + "epoch": 0.5051395007342144, + "fcm_dpo/beta": 0.0018617368768900633, + "fcm_dpo/delta": -0.020948857069015503, + "fcm_dpo/margin": 224.7209014892578, + "fcm_dpo/q_t": 0.4059738516807556, + "grad_norm": 23.597854614257812, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.3812987804412842, + "logits/rejected": -0.3833147883415222, + "logps/chosen": -352.3494873046875, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -613.7291259765625, + "loss": 1.0795, + "margin_dpo/margin_mean": 224.7209014892578, + "margin_dpo/margin_std": 275.2275390625, + "step": 344 + }, + { + "KL/chosen_KL_mean": -355.3021240234375, + "KL/mean": -474.9087219238281, + "KL/rejected_KL_mean": -594.5153198242188, + "KL/std": 251.06130981445312, + "epoch": 0.5066079295154186, + "fcm_dpo/beta": 0.0018341855611652136, + "fcm_dpo/delta": -0.04054499790072441, + "fcm_dpo/margin": 239.21319580078125, + "fcm_dpo/q_t": 0.39786964654922485, + "grad_norm": 20.200607299804688, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.4079459309577942, + "logits/rejected": -0.4065578877925873, + "logps/chosen": -416.98779296875, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49041748046875, + "logps/rejected": -694.0057373046875, + "loss": 1.0538, + "margin_dpo/margin_mean": 239.21319580078125, + "margin_dpo/margin_std": 273.10797119140625, + "step": 345 + }, + { + "KL/chosen_KL_mean": -354.8230895996094, + "KL/mean": -467.76507568359375, + "KL/rejected_KL_mean": -580.70703125, + "KL/std": 250.11328125, + "epoch": 0.5080763582966226, + "fcm_dpo/beta": 0.001822044956497848, + "fcm_dpo/delta": -0.012084376066923141, + "fcm_dpo/margin": 225.88394165039062, + "fcm_dpo/q_t": 0.4034339189529419, + "grad_norm": 21.91631317138672, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.36091554164886475, + "logits/rejected": -0.35045647621154785, + "logps/chosen": -410.079345703125, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -658.122314453125, + "loss": 1.0737, + "margin_dpo/margin_mean": 225.88394165039062, + "margin_dpo/margin_std": 270.33984375, + "step": 346 + }, + { + "KL/chosen_KL_mean": -356.4461364746094, + "KL/mean": -470.38409423828125, + "KL/rejected_KL_mean": -584.322021484375, + "KL/std": 266.44903564453125, + "epoch": 0.5095447870778267, + "fcm_dpo/beta": 0.0018181647174060345, + "fcm_dpo/delta": -0.014934061095118523, + "fcm_dpo/margin": 227.87588500976562, + "fcm_dpo/q_t": 0.40489083528518677, + "grad_norm": 24.682762145996094, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.38032758235931396, + "logits/rejected": -0.37665826082229614, + "logps/chosen": -414.01239013671875, + "logps/ref_chosen": -57.56623840332031, + "logps/ref_rejected": -92.35509490966797, + "logps/rejected": -676.6771240234375, + "loss": 1.083, + "margin_dpo/margin_mean": 227.87588500976562, + "margin_dpo/margin_std": 299.0618896484375, + "step": 347 + }, + { + "KL/chosen_KL_mean": -315.14227294921875, + "KL/mean": -411.5518493652344, + "KL/rejected_KL_mean": -507.9614562988281, + "KL/std": 229.25570678710938, + "epoch": 0.5110132158590308, + "fcm_dpo/beta": 0.001820417819544673, + "fcm_dpo/delta": 0.05043090134859085, + "fcm_dpo/margin": 192.81918334960938, + "fcm_dpo/q_t": 0.4191325306892395, + "grad_norm": 23.302974700927734, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.4306999444961548, + "logits/rejected": -0.4324670433998108, + "logps/chosen": -371.4599609375, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13836669921875, + "logps/rejected": -597.099853515625, + "loss": 1.1225, + "margin_dpo/margin_mean": 192.81918334960938, + "margin_dpo/margin_std": 263.36297607421875, + "step": 348 + }, + { + "KL/chosen_KL_mean": -301.320556640625, + "KL/mean": -410.7008056640625, + "KL/rejected_KL_mean": -520.0811157226562, + "KL/std": 251.19491577148438, + "epoch": 0.5124816446402349, + "fcm_dpo/beta": 0.0018413200741633773, + "fcm_dpo/delta": -0.003216017037630081, + "fcm_dpo/margin": 218.76055908203125, + "fcm_dpo/q_t": 0.4088849425315857, + "grad_norm": 18.977298736572266, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.38458961248397827, + "logits/rejected": -0.38436293601989746, + "logps/chosen": -359.3460693359375, + "logps/ref_chosen": -58.025516510009766, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -617.5862426757812, + "loss": 1.1007, + "margin_dpo/margin_mean": 218.7605438232422, + "margin_dpo/margin_std": 311.55657958984375, + "step": 349 + }, + { + "KL/chosen_KL_mean": -319.11181640625, + "KL/mean": -425.6887512207031, + "KL/rejected_KL_mean": -532.2657470703125, + "KL/std": 239.318603515625, + "epoch": 0.5139500734214391, + "fcm_dpo/beta": 0.0018291505984961987, + "fcm_dpo/delta": 0.010211531072854996, + "fcm_dpo/margin": 213.15390014648438, + "fcm_dpo/q_t": 0.4092941880226135, + "grad_norm": 30.917598724365234, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.4026058614253998, + "logits/rejected": -0.40751904249191284, + "logps/chosen": -383.44232177734375, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -622.1373291015625, + "loss": 1.1063, + "margin_dpo/margin_mean": 213.15390014648438, + "margin_dpo/margin_std": 299.6205749511719, + "step": 350 + }, + { + "KL/chosen_KL_mean": -282.3033447265625, + "KL/mean": -412.9404602050781, + "KL/rejected_KL_mean": -543.5775146484375, + "KL/std": 274.8599853515625, + "epoch": 0.5154185022026432, + "fcm_dpo/beta": 0.001811300404369831, + "fcm_dpo/delta": -0.07729293406009674, + "fcm_dpo/margin": 261.2742004394531, + "fcm_dpo/q_t": 0.3929722309112549, + "grad_norm": 29.79630470275879, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.41858357191085815, + "logits/rejected": -0.4184862971305847, + "logps/chosen": -342.9754943847656, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -645.1429443359375, + "loss": 1.0441, + "margin_dpo/margin_mean": 261.2742004394531, + "margin_dpo/margin_std": 318.12158203125, + "step": 351 + }, + { + "KL/chosen_KL_mean": -331.2454528808594, + "KL/mean": -410.2402648925781, + "KL/rejected_KL_mean": -489.23504638671875, + "KL/std": 247.09182739257812, + "epoch": 0.5168869309838473, + "fcm_dpo/beta": 0.0018348516896367073, + "fcm_dpo/delta": 0.11355704069137573, + "fcm_dpo/margin": 157.98960876464844, + "fcm_dpo/q_t": 0.4335269033908844, + "grad_norm": 34.465721130371094, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.37872931361198425, + "logits/rejected": -0.3453086316585541, + "logps/chosen": -402.18890380859375, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -565.8770751953125, + "loss": 1.1791, + "margin_dpo/margin_mean": 157.9896240234375, + "margin_dpo/margin_std": 274.4894104003906, + "step": 352 + }, + { + "KL/chosen_KL_mean": -305.1307373046875, + "KL/mean": -407.434814453125, + "KL/rejected_KL_mean": -509.7388916015625, + "KL/std": 240.89715576171875, + "epoch": 0.5183553597650514, + "fcm_dpo/beta": 0.0018470755312591791, + "fcm_dpo/delta": 0.022442463785409927, + "fcm_dpo/margin": 204.60816955566406, + "fcm_dpo/q_t": 0.4131419360637665, + "grad_norm": 25.36398696899414, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.420898973941803, + "logits/rejected": -0.40882444381713867, + "logps/chosen": -363.52606201171875, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33553314208984, + "logps/rejected": -590.074462890625, + "loss": 1.1061, + "margin_dpo/margin_mean": 204.60816955566406, + "margin_dpo/margin_std": 271.8081970214844, + "step": 353 + }, + { + "KL/chosen_KL_mean": -247.3609619140625, + "KL/mean": -360.4647216796875, + "KL/rejected_KL_mean": -473.56854248046875, + "KL/std": 221.80062866210938, + "epoch": 0.5198237885462555, + "fcm_dpo/beta": 0.0018517575226724148, + "fcm_dpo/delta": -0.019758004695177078, + "fcm_dpo/margin": 226.20755004882812, + "fcm_dpo/q_t": 0.4013108015060425, + "grad_norm": 31.546432495117188, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.3748503029346466, + "logits/rejected": -0.3650524616241455, + "logps/chosen": -307.1639404296875, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -562.3260498046875, + "loss": 1.0535, + "margin_dpo/margin_mean": 226.20755004882812, + "margin_dpo/margin_std": 229.88156127929688, + "step": 354 + }, + { + "KL/chosen_KL_mean": -252.09201049804688, + "KL/mean": -361.91143798828125, + "KL/rejected_KL_mean": -471.73089599609375, + "KL/std": 225.0244140625, + "epoch": 0.5212922173274597, + "fcm_dpo/beta": 0.0018464226741343737, + "fcm_dpo/delta": -0.005889484658837318, + "fcm_dpo/margin": 219.6388702392578, + "fcm_dpo/q_t": 0.4039592742919922, + "grad_norm": 40.25979232788086, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.4974886476993561, + "logits/rejected": -0.4918820858001709, + "logps/chosen": -306.22052001953125, + "logps/ref_chosen": -54.12849807739258, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -554.136962890625, + "loss": 1.0617, + "margin_dpo/margin_mean": 219.6388702392578, + "margin_dpo/margin_std": 220.4207763671875, + "step": 355 + }, + { + "KL/chosen_KL_mean": -312.7377624511719, + "KL/mean": -375.03485107421875, + "KL/rejected_KL_mean": -437.3319091796875, + "KL/std": 239.71302795410156, + "epoch": 0.5227606461086637, + "fcm_dpo/beta": 0.001856822520494461, + "fcm_dpo/delta": 0.0374276302754879, + "fcm_dpo/margin": 124.59414672851562, + "fcm_dpo/q_t": 0.44722917675971985, + "grad_norm": 30.493345260620117, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.4328617453575134, + "logits/rejected": -0.4156278967857361, + "logps/chosen": -377.41156005859375, + "logps/ref_chosen": -64.6738052368164, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -513.231201171875, + "loss": 1.2406, + "margin_dpo/margin_mean": 124.59414672851562, + "margin_dpo/margin_std": 293.3489990234375, + "step": 356 + }, + { + "KL/chosen_KL_mean": -279.3336181640625, + "KL/mean": -384.30426025390625, + "KL/rejected_KL_mean": -489.27496337890625, + "KL/std": 236.43228149414062, + "epoch": 0.5242290748898678, + "fcm_dpo/beta": 0.0018584367353469133, + "fcm_dpo/delta": 0.009893104434013367, + "fcm_dpo/margin": 209.9413604736328, + "fcm_dpo/q_t": 0.40836572647094727, + "grad_norm": 28.883237838745117, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.40963172912597656, + "logits/rejected": -0.3999664783477783, + "logps/chosen": -332.05938720703125, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -576.1160888671875, + "loss": 1.0839, + "margin_dpo/margin_mean": 209.9413604736328, + "margin_dpo/margin_std": 243.27175903320312, + "step": 357 + }, + { + "KL/chosen_KL_mean": -262.045166015625, + "KL/mean": -358.10491943359375, + "KL/rejected_KL_mean": -454.16461181640625, + "KL/std": 234.3748016357422, + "epoch": 0.5256975036710719, + "fcm_dpo/beta": 0.001879463205114007, + "fcm_dpo/delta": 0.04035775363445282, + "fcm_dpo/margin": 192.11947631835938, + "fcm_dpo/q_t": 0.4165921211242676, + "grad_norm": 24.633974075317383, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.46384721994400024, + "logits/rejected": -0.4513862133026123, + "logps/chosen": -325.2506103515625, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -542.5379028320312, + "loss": 1.1107, + "margin_dpo/margin_mean": 192.11947631835938, + "margin_dpo/margin_std": 253.2946014404297, + "step": 358 + }, + { + "KL/chosen_KL_mean": -302.02459716796875, + "KL/mean": -414.83648681640625, + "KL/rejected_KL_mean": -527.6484375, + "KL/std": 234.064453125, + "epoch": 0.527165932452276, + "fcm_dpo/beta": 0.001871941378340125, + "fcm_dpo/delta": -0.02348851040005684, + "fcm_dpo/margin": 225.623779296875, + "fcm_dpo/q_t": 0.40334317088127136, + "grad_norm": 30.42852783203125, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.3995208442211151, + "logits/rejected": -0.39267587661743164, + "logps/chosen": -358.39483642578125, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -609.8221435546875, + "loss": 1.0718, + "margin_dpo/margin_mean": 225.62379455566406, + "margin_dpo/margin_std": 277.3382873535156, + "step": 359 + }, + { + "KL/chosen_KL_mean": -297.1678771972656, + "KL/mean": -389.17437744140625, + "KL/rejected_KL_mean": -481.1808776855469, + "KL/std": 202.0519561767578, + "epoch": 0.5286343612334802, + "fcm_dpo/beta": 0.0018919282592833042, + "fcm_dpo/delta": 0.05358727648854256, + "fcm_dpo/margin": 184.01300048828125, + "fcm_dpo/q_t": 0.4175671935081482, + "grad_norm": 30.946216583251953, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.3726957440376282, + "logits/rejected": -0.3605055510997772, + "logps/chosen": -348.6282653808594, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892059326172, + "logps/rejected": -551.019775390625, + "loss": 1.1038, + "margin_dpo/margin_mean": 184.01300048828125, + "margin_dpo/margin_std": 202.7188262939453, + "step": 360 + }, + { + "KL/chosen_KL_mean": -327.22723388671875, + "KL/mean": -416.80938720703125, + "KL/rejected_KL_mean": -506.39154052734375, + "KL/std": 240.4334716796875, + "epoch": 0.5301027900146843, + "fcm_dpo/beta": 0.001915230881422758, + "fcm_dpo/delta": 0.05861767381429672, + "fcm_dpo/margin": 179.164306640625, + "fcm_dpo/q_t": 0.42051640152931213, + "grad_norm": 29.356969833374023, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.39026200771331787, + "logits/rejected": -0.3884269595146179, + "logps/chosen": -381.0967712402344, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.7692642211914, + "logps/rejected": -597.1607666015625, + "loss": 1.1314, + "margin_dpo/margin_mean": 179.164306640625, + "margin_dpo/margin_std": 260.03955078125, + "step": 361 + }, + { + "KL/chosen_KL_mean": -290.72216796875, + "KL/mean": -430.7841796875, + "KL/rejected_KL_mean": -570.84619140625, + "KL/std": 250.39520263671875, + "epoch": 0.5315712187958884, + "fcm_dpo/beta": 0.001872203079983592, + "fcm_dpo/delta": -0.13209237158298492, + "fcm_dpo/margin": 280.12396240234375, + "fcm_dpo/q_t": 0.3787139654159546, + "grad_norm": 28.38374900817871, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.3565158247947693, + "logits/rejected": -0.3644503951072693, + "logps/chosen": -349.3612060546875, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -676.4281005859375, + "loss": 0.9864, + "margin_dpo/margin_mean": 280.12396240234375, + "margin_dpo/margin_std": 268.27490234375, + "step": 362 + }, + { + "KL/chosen_KL_mean": -277.21441650390625, + "KL/mean": -418.75067138671875, + "KL/rejected_KL_mean": -560.2869262695312, + "KL/std": 253.55523681640625, + "epoch": 0.5330396475770925, + "fcm_dpo/beta": 0.0018373643979430199, + "fcm_dpo/delta": -0.12650209665298462, + "fcm_dpo/margin": 283.072509765625, + "fcm_dpo/q_t": 0.37961655855178833, + "grad_norm": 22.140954971313477, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.3845895528793335, + "logits/rejected": -0.3806605339050293, + "logps/chosen": -321.7727966308594, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -634.98193359375, + "loss": 0.9916, + "margin_dpo/margin_mean": 283.072509765625, + "margin_dpo/margin_std": 275.70379638671875, + "step": 363 + }, + { + "KL/chosen_KL_mean": -303.1225280761719, + "KL/mean": -410.65924072265625, + "KL/rejected_KL_mean": -518.1959228515625, + "KL/std": 250.283447265625, + "epoch": 0.5345080763582967, + "fcm_dpo/beta": 0.0018285869155079126, + "fcm_dpo/delta": 0.006663650274276733, + "fcm_dpo/margin": 215.0734100341797, + "fcm_dpo/q_t": 0.4090085029602051, + "grad_norm": 24.997861862182617, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.4224643111228943, + "logits/rejected": -0.4338565468788147, + "logps/chosen": -352.01715087890625, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -609.5916748046875, + "loss": 1.0981, + "margin_dpo/margin_mean": 215.0734100341797, + "margin_dpo/margin_std": 289.23095703125, + "step": 364 + }, + { + "KL/chosen_KL_mean": -294.5177001953125, + "KL/mean": -405.79449462890625, + "KL/rejected_KL_mean": -517.0712890625, + "KL/std": 263.12286376953125, + "epoch": 0.5359765051395007, + "fcm_dpo/beta": 0.0018205586820840836, + "fcm_dpo/delta": -0.005390607286244631, + "fcm_dpo/margin": 222.5535430908203, + "fcm_dpo/q_t": 0.4073421359062195, + "grad_norm": 24.999404907226562, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.4111742377281189, + "logits/rejected": -0.4230782985687256, + "logps/chosen": -346.01043701171875, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -609.77294921875, + "loss": 1.0785, + "margin_dpo/margin_mean": 222.5535430908203, + "margin_dpo/margin_std": 272.5834655761719, + "step": 365 + }, + { + "KL/chosen_KL_mean": -270.3475646972656, + "KL/mean": -384.409423828125, + "KL/rejected_KL_mean": -498.47125244140625, + "KL/std": 244.6814422607422, + "epoch": 0.5374449339207048, + "fcm_dpo/beta": 0.001808905741199851, + "fcm_dpo/delta": -0.013614185154438019, + "fcm_dpo/margin": 228.12367248535156, + "fcm_dpo/q_t": 0.4051057696342468, + "grad_norm": 21.499807357788086, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.41397756338119507, + "logits/rejected": -0.4283139109611511, + "logps/chosen": -315.0681457519531, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -581.7816162109375, + "loss": 1.0815, + "margin_dpo/margin_mean": 228.12368774414062, + "margin_dpo/margin_std": 290.66448974609375, + "step": 366 + }, + { + "KL/chosen_KL_mean": -284.0663146972656, + "KL/mean": -375.30828857421875, + "KL/rejected_KL_mean": -466.55029296875, + "KL/std": 219.5314178466797, + "epoch": 0.5389133627019089, + "fcm_dpo/beta": 0.0018367799930274487, + "fcm_dpo/delta": 0.06687445938587189, + "fcm_dpo/margin": 182.48397827148438, + "fcm_dpo/q_t": 0.4210050106048584, + "grad_norm": 19.561819076538086, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.418517529964447, + "logits/rejected": -0.40183088183403015, + "logps/chosen": -342.47174072265625, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -543.3016357421875, + "loss": 1.1247, + "margin_dpo/margin_mean": 182.48397827148438, + "margin_dpo/margin_std": 237.13613891601562, + "step": 367 + }, + { + "KL/chosen_KL_mean": -266.5557861328125, + "KL/mean": -412.49371337890625, + "KL/rejected_KL_mean": -558.431640625, + "KL/std": 249.94715881347656, + "epoch": 0.540381791483113, + "fcm_dpo/beta": 0.0018038455164059997, + "fcm_dpo/delta": -0.1335085779428482, + "fcm_dpo/margin": 291.8758850097656, + "fcm_dpo/q_t": 0.37479937076568604, + "grad_norm": 45.23311233520508, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.41935569047927856, + "logits/rejected": -0.4271644651889801, + "logps/chosen": -311.00830078125, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -656.9869384765625, + "loss": 0.9664, + "margin_dpo/margin_mean": 291.8758544921875, + "margin_dpo/margin_std": 225.90078735351562, + "step": 368 + }, + { + "KL/chosen_KL_mean": -350.90386962890625, + "KL/mean": -427.52069091796875, + "KL/rejected_KL_mean": -504.13751220703125, + "KL/std": 248.12368774414062, + "epoch": 0.5418502202643172, + "fcm_dpo/beta": 0.001830049091950059, + "fcm_dpo/delta": 0.12243049591779709, + "fcm_dpo/margin": 153.2336883544922, + "fcm_dpo/q_t": 0.43473055958747864, + "grad_norm": 29.0583553314209, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.43031615018844604, + "logits/rejected": -0.413091778755188, + "logps/chosen": -422.2853698730469, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -595.433349609375, + "loss": 1.1876, + "margin_dpo/margin_mean": 153.2336883544922, + "margin_dpo/margin_std": 278.4016418457031, + "step": 369 + }, + { + "KL/chosen_KL_mean": -361.59027099609375, + "KL/mean": -435.93548583984375, + "KL/rejected_KL_mean": -510.28070068359375, + "KL/std": 257.38360595703125, + "epoch": 0.5433186490455213, + "fcm_dpo/beta": 0.001874544657766819, + "fcm_dpo/delta": 0.12432844936847687, + "fcm_dpo/margin": 148.69049072265625, + "fcm_dpo/q_t": 0.43603307008743286, + "grad_norm": 32.01681137084961, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.4668412208557129, + "logits/rejected": -0.4614550471305847, + "logps/chosen": -433.19775390625, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -607.54052734375, + "loss": 1.1953, + "margin_dpo/margin_mean": 148.69049072265625, + "margin_dpo/margin_std": 284.5626220703125, + "step": 370 + }, + { + "KL/chosen_KL_mean": -344.98797607421875, + "KL/mean": -453.09259033203125, + "KL/rejected_KL_mean": -561.197265625, + "KL/std": 260.2284851074219, + "epoch": 0.5447870778267254, + "fcm_dpo/beta": 0.0018845757003873587, + "fcm_dpo/delta": -0.007901359349489212, + "fcm_dpo/margin": 216.20925903320312, + "fcm_dpo/q_t": 0.4071364104747772, + "grad_norm": 31.476709365844727, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.4888390302658081, + "logits/rejected": -0.4776480793952942, + "logps/chosen": -414.4024658203125, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -660.369384765625, + "loss": 1.0943, + "margin_dpo/margin_mean": 216.20925903320312, + "margin_dpo/margin_std": 298.52569580078125, + "step": 371 + }, + { + "KL/chosen_KL_mean": -332.8981628417969, + "KL/mean": -460.857177734375, + "KL/rejected_KL_mean": -588.816162109375, + "KL/std": 296.18841552734375, + "epoch": 0.5462555066079295, + "fcm_dpo/beta": 0.0018558462616056204, + "fcm_dpo/delta": -0.07873637974262238, + "fcm_dpo/margin": 255.91799926757812, + "fcm_dpo/q_t": 0.39386242628097534, + "grad_norm": 24.130836486816406, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.44447630643844604, + "logits/rejected": -0.4267101287841797, + "logps/chosen": -394.7161560058594, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53948974609375, + "logps/rejected": -667.3556518554688, + "loss": 1.0516, + "margin_dpo/margin_mean": 255.91799926757812, + "margin_dpo/margin_std": 333.7495422363281, + "step": 372 + }, + { + "KL/chosen_KL_mean": -361.79736328125, + "KL/mean": -481.796875, + "KL/rejected_KL_mean": -601.7963256835938, + "KL/std": 286.9070129394531, + "epoch": 0.5477239353891337, + "fcm_dpo/beta": 0.0018471537623554468, + "fcm_dpo/delta": -0.04569406807422638, + "fcm_dpo/margin": 239.99903869628906, + "fcm_dpo/q_t": 0.3971483111381531, + "grad_norm": 27.912851333618164, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.4145781695842743, + "logits/rejected": -0.3933746814727783, + "logps/chosen": -426.01446533203125, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -687.7559814453125, + "loss": 1.0572, + "margin_dpo/margin_mean": 239.99905395507812, + "margin_dpo/margin_std": 276.5765075683594, + "step": 373 + }, + { + "KL/chosen_KL_mean": -320.88250732421875, + "KL/mean": -430.1343078613281, + "KL/rejected_KL_mean": -539.3861083984375, + "KL/std": 306.24932861328125, + "epoch": 0.5491923641703378, + "fcm_dpo/beta": 0.0018239655764773488, + "fcm_dpo/delta": 0.0010283365845680237, + "fcm_dpo/margin": 218.50360107421875, + "fcm_dpo/q_t": 0.41195404529571533, + "grad_norm": 25.18126106262207, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.5014743208885193, + "logits/rejected": -0.4834766983985901, + "logps/chosen": -391.53271484375, + "logps/ref_chosen": -70.65018463134766, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -633.0263061523438, + "loss": 1.1167, + "margin_dpo/margin_mean": 218.50360107421875, + "margin_dpo/margin_std": 344.5791015625, + "step": 374 + }, + { + "KL/chosen_KL_mean": -325.4786071777344, + "KL/mean": -431.72906494140625, + "KL/rejected_KL_mean": -537.9794921875, + "KL/std": 243.8807373046875, + "epoch": 0.5506607929515418, + "fcm_dpo/beta": 0.001825918909162283, + "fcm_dpo/delta": 0.011883806437253952, + "fcm_dpo/margin": 212.50088500976562, + "fcm_dpo/q_t": 0.4101504981517792, + "grad_norm": 34.303585052490234, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.45803767442703247, + "logits/rejected": -0.4583319425582886, + "logps/chosen": -385.558837890625, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -626.9177856445312, + "loss": 1.1046, + "margin_dpo/margin_mean": 212.50088500976562, + "margin_dpo/margin_std": 290.35430908203125, + "step": 375 + }, + { + "KL/chosen_KL_mean": -315.9422302246094, + "KL/mean": -448.74517822265625, + "KL/rejected_KL_mean": -581.548095703125, + "KL/std": 266.8795166015625, + "epoch": 0.5521292217327459, + "fcm_dpo/beta": 0.0018120380118489265, + "fcm_dpo/delta": -0.08552736043930054, + "fcm_dpo/margin": 265.6058654785156, + "fcm_dpo/q_t": 0.3894526958465576, + "grad_norm": 25.428913116455078, + "learning_rate": 2.5e-07, + "logits/chosen": -0.44534194469451904, + "logits/rejected": -0.4348585307598114, + "logps/chosen": -378.6025390625, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.52660369873047, + "logps/rejected": -687.07470703125, + "loss": 1.0361, + "margin_dpo/margin_mean": 265.6058654785156, + "margin_dpo/margin_std": 312.51824951171875, + "step": 376 + }, + { + "KL/chosen_KL_mean": -318.29412841796875, + "KL/mean": -442.55340576171875, + "KL/rejected_KL_mean": -566.812744140625, + "KL/std": 271.7928771972656, + "epoch": 0.55359765051395, + "fcm_dpo/beta": 0.0017974915681406856, + "fcm_dpo/delta": -0.048895493149757385, + "fcm_dpo/margin": 248.51861572265625, + "fcm_dpo/q_t": 0.3962140679359436, + "grad_norm": 34.237945556640625, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.39339831471443176, + "logits/rejected": -0.39519575238227844, + "logps/chosen": -372.7728576660156, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -665.5160522460938, + "loss": 1.0485, + "margin_dpo/margin_mean": 248.5186004638672, + "margin_dpo/margin_std": 279.26947021484375, + "step": 377 + }, + { + "KL/chosen_KL_mean": -306.4930114746094, + "KL/mean": -437.6435546875, + "KL/rejected_KL_mean": -568.7940673828125, + "KL/std": 253.0465087890625, + "epoch": 0.5550660792951542, + "fcm_dpo/beta": 0.001769623951986432, + "fcm_dpo/delta": -0.06735318899154663, + "fcm_dpo/margin": 262.3010559082031, + "fcm_dpo/q_t": 0.39068034291267395, + "grad_norm": 28.7508544921875, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.4216500520706177, + "logits/rejected": -0.43784886598587036, + "logps/chosen": -351.5135498046875, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -656.841064453125, + "loss": 1.0276, + "margin_dpo/margin_mean": 262.3010559082031, + "margin_dpo/margin_std": 266.9130554199219, + "step": 378 + }, + { + "KL/chosen_KL_mean": -351.31341552734375, + "KL/mean": -483.653564453125, + "KL/rejected_KL_mean": -615.9937744140625, + "KL/std": 259.1260986328125, + "epoch": 0.5565345080763583, + "fcm_dpo/beta": 0.0017398163909092546, + "fcm_dpo/delta": -0.06416111439466476, + "fcm_dpo/margin": 264.68035888671875, + "fcm_dpo/q_t": 0.39443039894104004, + "grad_norm": 26.82124137878418, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.41827017068862915, + "logits/rejected": -0.4405589699745178, + "logps/chosen": -404.4954833984375, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.3001708984375, + "logps/rejected": -730.2939453125, + "loss": 1.0472, + "margin_dpo/margin_mean": 264.68035888671875, + "margin_dpo/margin_std": 312.6787109375, + "step": 379 + }, + { + "KL/chosen_KL_mean": -350.41455078125, + "KL/mean": -491.9856262207031, + "KL/rejected_KL_mean": -633.5567626953125, + "KL/std": 297.56414794921875, + "epoch": 0.5580029368575624, + "fcm_dpo/beta": 0.0017209737561643124, + "fcm_dpo/delta": -0.0916648805141449, + "fcm_dpo/margin": 283.14215087890625, + "fcm_dpo/q_t": 0.3864797353744507, + "grad_norm": 27.675559997558594, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.42042213678359985, + "logits/rejected": -0.447647362947464, + "logps/chosen": -401.767578125, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -737.7484130859375, + "loss": 1.0301, + "margin_dpo/margin_mean": 283.14215087890625, + "margin_dpo/margin_std": 325.3090515136719, + "step": 380 + }, + { + "KL/chosen_KL_mean": -372.5402526855469, + "KL/mean": -474.24298095703125, + "KL/rejected_KL_mean": -575.9457397460938, + "KL/std": 250.25369262695312, + "epoch": 0.5594713656387665, + "fcm_dpo/beta": 0.001720770844258368, + "fcm_dpo/delta": 0.05182623863220215, + "fcm_dpo/margin": 203.4054718017578, + "fcm_dpo/q_t": 0.42006832361221313, + "grad_norm": 28.662925720214844, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.42481085658073425, + "logits/rejected": -0.4088062047958374, + "logps/chosen": -430.34332275390625, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -655.1651611328125, + "loss": 1.148, + "margin_dpo/margin_mean": 203.40545654296875, + "margin_dpo/margin_std": 341.2312927246094, + "step": 381 + }, + { + "KL/chosen_KL_mean": -368.66522216796875, + "KL/mean": -494.4615173339844, + "KL/rejected_KL_mean": -620.2578125, + "KL/std": 243.38522338867188, + "epoch": 0.5609397944199707, + "fcm_dpo/beta": 0.0017197042470797896, + "fcm_dpo/delta": -0.03414086997509003, + "fcm_dpo/margin": 251.59266662597656, + "fcm_dpo/q_t": 0.39874282479286194, + "grad_norm": 24.968591690063477, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.4726359248161316, + "logits/rejected": -0.47993141412734985, + "logps/chosen": -434.68548583984375, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71016693115234, + "logps/rejected": -730.968017578125, + "loss": 1.0487, + "margin_dpo/margin_mean": 251.59266662597656, + "margin_dpo/margin_std": 264.68463134765625, + "step": 382 + }, + { + "KL/chosen_KL_mean": -396.24041748046875, + "KL/mean": -516.6358032226562, + "KL/rejected_KL_mean": -637.0311889648438, + "KL/std": 278.3402099609375, + "epoch": 0.5624082232011748, + "fcm_dpo/beta": 0.0017115201335400343, + "fcm_dpo/delta": -0.012677527032792568, + "fcm_dpo/margin": 240.79074096679688, + "fcm_dpo/q_t": 0.4055634140968323, + "grad_norm": 30.020652770996094, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.4480942487716675, + "logits/rejected": -0.4583819806575775, + "logps/chosen": -446.63189697265625, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -730.7470703125, + "loss": 1.1069, + "margin_dpo/margin_mean": 240.79074096679688, + "margin_dpo/margin_std": 365.572021484375, + "step": 383 + }, + { + "KL/chosen_KL_mean": -396.1028137207031, + "KL/mean": -495.835693359375, + "KL/rejected_KL_mean": -595.568603515625, + "KL/std": 259.7392578125, + "epoch": 0.5638766519823789, + "fcm_dpo/beta": 0.0017325121443718672, + "fcm_dpo/delta": 0.055945903062820435, + "fcm_dpo/margin": 199.46575927734375, + "fcm_dpo/q_t": 0.418599933385849, + "grad_norm": 25.613414764404297, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.5003800392150879, + "logits/rejected": -0.4956563413143158, + "logps/chosen": -448.14892578125, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -681.3294677734375, + "loss": 1.1132, + "margin_dpo/margin_mean": 199.46575927734375, + "margin_dpo/margin_std": 242.4035186767578, + "step": 384 + }, + { + "KL/chosen_KL_mean": -381.33917236328125, + "KL/mean": -502.6947021484375, + "KL/rejected_KL_mean": -624.0501708984375, + "KL/std": 228.0861053466797, + "epoch": 0.5653450807635829, + "fcm_dpo/beta": 0.0017262771725654602, + "fcm_dpo/delta": -0.019818957895040512, + "fcm_dpo/margin": 242.7109832763672, + "fcm_dpo/q_t": 0.40087053179740906, + "grad_norm": 34.80295181274414, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.5185421705245972, + "logits/rejected": -0.4977598786354065, + "logps/chosen": -446.891357421875, + "logps/ref_chosen": -65.55215454101562, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -701.8780517578125, + "loss": 1.054, + "margin_dpo/margin_mean": 242.71096801757812, + "margin_dpo/margin_std": 247.6511993408203, + "step": 385 + }, + { + "KL/chosen_KL_mean": -391.0846862792969, + "KL/mean": -520.10107421875, + "KL/rejected_KL_mean": -649.117431640625, + "KL/std": 283.81964111328125, + "epoch": 0.566813509544787, + "fcm_dpo/beta": 0.0017148086335510015, + "fcm_dpo/delta": -0.044420357793569565, + "fcm_dpo/margin": 258.03277587890625, + "fcm_dpo/q_t": 0.39896559715270996, + "grad_norm": 34.90454864501953, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.5139098167419434, + "logits/rejected": -0.5114161968231201, + "logps/chosen": -449.3065490722656, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -741.4448852539062, + "loss": 1.0673, + "margin_dpo/margin_mean": 258.03277587890625, + "margin_dpo/margin_std": 333.954833984375, + "step": 386 + }, + { + "KL/chosen_KL_mean": -408.6097412109375, + "KL/mean": -517.918701171875, + "KL/rejected_KL_mean": -627.2276611328125, + "KL/std": 263.1587219238281, + "epoch": 0.5682819383259912, + "fcm_dpo/beta": 0.0017092199996113777, + "fcm_dpo/delta": 0.027111487463116646, + "fcm_dpo/margin": 218.617919921875, + "fcm_dpo/q_t": 0.412253201007843, + "grad_norm": 33.19843292236328, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.5541732311248779, + "logits/rejected": -0.5390141010284424, + "logps/chosen": -475.0291748046875, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -719.3968505859375, + "loss": 1.0981, + "margin_dpo/margin_mean": 218.617919921875, + "margin_dpo/margin_std": 265.804443359375, + "step": 387 + }, + { + "KL/chosen_KL_mean": -377.7342224121094, + "KL/mean": -525.062744140625, + "KL/rejected_KL_mean": -672.391357421875, + "KL/std": 302.99163818359375, + "epoch": 0.5697503671071953, + "fcm_dpo/beta": 0.0016880175098776817, + "fcm_dpo/delta": -0.10291901975870132, + "fcm_dpo/margin": 294.6571350097656, + "fcm_dpo/q_t": 0.387167364358902, + "grad_norm": 31.615806579589844, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.49094468355178833, + "logits/rejected": -0.5020414590835571, + "logps/chosen": -427.8636779785156, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -776.8244018554688, + "loss": 1.0239, + "margin_dpo/margin_mean": 294.65716552734375, + "margin_dpo/margin_std": 341.442138671875, + "step": 388 + }, + { + "KL/chosen_KL_mean": -403.025146484375, + "KL/mean": -526.2021484375, + "KL/rejected_KL_mean": -649.379150390625, + "KL/std": 294.0400390625, + "epoch": 0.5712187958883994, + "fcm_dpo/beta": 0.001677666325122118, + "fcm_dpo/delta": -0.013942467980086803, + "fcm_dpo/margin": 246.35403442382812, + "fcm_dpo/q_t": 0.4049556255340576, + "grad_norm": 31.12624740600586, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.5525184273719788, + "logits/rejected": -0.5475004315376282, + "logps/chosen": -460.9317626953125, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -727.293701171875, + "loss": 1.083, + "margin_dpo/margin_mean": 246.35403442382812, + "margin_dpo/margin_std": 321.620361328125, + "step": 389 + }, + { + "KL/chosen_KL_mean": -382.27349853515625, + "KL/mean": -502.8726806640625, + "KL/rejected_KL_mean": -623.4718017578125, + "KL/std": 278.3486022949219, + "epoch": 0.5726872246696035, + "fcm_dpo/beta": 0.0016733764205127954, + "fcm_dpo/delta": -0.00393829308450222, + "fcm_dpo/margin": 241.19834899902344, + "fcm_dpo/q_t": 0.4098580479621887, + "grad_norm": 22.825424194335938, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.4611801207065582, + "logits/rejected": -0.45201367139816284, + "logps/chosen": -431.4993896484375, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -709.0, + "loss": 1.1036, + "margin_dpo/margin_mean": 241.19834899902344, + "margin_dpo/margin_std": 353.98956298828125, + "step": 390 + }, + { + "KL/chosen_KL_mean": -373.9214782714844, + "KL/mean": -445.02374267578125, + "KL/rejected_KL_mean": -516.1259765625, + "KL/std": 262.6861572265625, + "epoch": 0.5741556534508077, + "fcm_dpo/beta": 0.0017185378819704056, + "fcm_dpo/delta": 0.1594843566417694, + "fcm_dpo/margin": 142.20448303222656, + "fcm_dpo/q_t": 0.44371968507766724, + "grad_norm": 40.434242248535156, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.47930610179901123, + "logits/rejected": -0.4707353711128235, + "logps/chosen": -438.2511291503906, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -602.8641357421875, + "loss": 1.2116, + "margin_dpo/margin_mean": 142.20448303222656, + "margin_dpo/margin_std": 274.74224853515625, + "step": 391 + }, + { + "KL/chosen_KL_mean": -309.6976318359375, + "KL/mean": -447.2847900390625, + "KL/rejected_KL_mean": -584.8719482421875, + "KL/std": 268.35784912109375, + "epoch": 0.5756240822320118, + "fcm_dpo/beta": 0.0017193170497193933, + "fcm_dpo/delta": -0.076762355864048, + "fcm_dpo/margin": 275.17437744140625, + "fcm_dpo/q_t": 0.389517605304718, + "grad_norm": 23.396804809570312, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.4492862820625305, + "logits/rejected": -0.44523316621780396, + "logps/chosen": -363.20159912109375, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34584045410156, + "logps/rejected": -687.2177734375, + "loss": 1.0162, + "margin_dpo/margin_mean": 275.17437744140625, + "margin_dpo/margin_std": 261.0281066894531, + "step": 392 + }, + { + "KL/chosen_KL_mean": -305.1656494140625, + "KL/mean": -412.20819091796875, + "KL/rejected_KL_mean": -519.2507934570312, + "KL/std": 271.5501708984375, + "epoch": 0.5770925110132159, + "fcm_dpo/beta": 0.001711581600829959, + "fcm_dpo/delta": 0.03485105559229851, + "fcm_dpo/margin": 214.08511352539062, + "fcm_dpo/q_t": 0.4182642698287964, + "grad_norm": 21.774555206298828, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.44074547290802, + "logits/rejected": -0.4389492869377136, + "logps/chosen": -351.63958740234375, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -591.2196044921875, + "loss": 1.1278, + "margin_dpo/margin_mean": 214.0851287841797, + "margin_dpo/margin_std": 337.0278015136719, + "step": 393 + }, + { + "KL/chosen_KL_mean": -334.2687683105469, + "KL/mean": -451.5635681152344, + "KL/rejected_KL_mean": -568.8583984375, + "KL/std": 285.47515869140625, + "epoch": 0.57856093979442, + "fcm_dpo/beta": 0.0017150124767795205, + "fcm_dpo/delta": -0.0024520214647054672, + "fcm_dpo/margin": 234.589599609375, + "fcm_dpo/q_t": 0.4073120653629303, + "grad_norm": 26.771778106689453, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.44740962982177734, + "logits/rejected": -0.4476820230484009, + "logps/chosen": -387.1803283691406, + "logps/ref_chosen": -52.91154861450195, + "logps/ref_rejected": -90.8226318359375, + "logps/rejected": -659.6810302734375, + "loss": 1.0818, + "margin_dpo/margin_mean": 234.589599609375, + "margin_dpo/margin_std": 290.7215576171875, + "step": 394 + }, + { + "KL/chosen_KL_mean": -330.6572265625, + "KL/mean": -454.97314453125, + "KL/rejected_KL_mean": -579.2891845703125, + "KL/std": 274.084228515625, + "epoch": 0.580029368575624, + "fcm_dpo/beta": 0.001704660477116704, + "fcm_dpo/delta": -0.025200337171554565, + "fcm_dpo/margin": 248.63189697265625, + "fcm_dpo/q_t": 0.4029311537742615, + "grad_norm": 27.568639755249023, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.5028017163276672, + "logits/rejected": -0.49913692474365234, + "logps/chosen": -393.2033386230469, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -663.07177734375, + "loss": 1.0744, + "margin_dpo/margin_mean": 248.63189697265625, + "margin_dpo/margin_std": 315.91448974609375, + "step": 395 + }, + { + "KL/chosen_KL_mean": -339.6167297363281, + "KL/mean": -453.63134765625, + "KL/rejected_KL_mean": -567.64599609375, + "KL/std": 269.79595947265625, + "epoch": 0.5814977973568282, + "fcm_dpo/beta": 0.0017089219763875008, + "fcm_dpo/delta": 0.010719288140535355, + "fcm_dpo/margin": 228.02923583984375, + "fcm_dpo/q_t": 0.4084508419036865, + "grad_norm": 20.283159255981445, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.4966447353363037, + "logits/rejected": -0.4823087155818939, + "logps/chosen": -408.6126708984375, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -656.2926025390625, + "loss": 1.0763, + "margin_dpo/margin_mean": 228.02923583984375, + "margin_dpo/margin_std": 249.16293334960938, + "step": 396 + }, + { + "KL/chosen_KL_mean": -331.72991943359375, + "KL/mean": -472.43841552734375, + "KL/rejected_KL_mean": -613.1468505859375, + "KL/std": 266.5982360839844, + "epoch": 0.5829662261380323, + "fcm_dpo/beta": 0.0016858780290931463, + "fcm_dpo/delta": -0.07868388295173645, + "fcm_dpo/margin": 281.4169921875, + "fcm_dpo/q_t": 0.38912802934646606, + "grad_norm": 31.900684356689453, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.45149320363998413, + "logits/rejected": -0.45756763219833374, + "logps/chosen": -393.007080078125, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -716.2630004882812, + "loss": 1.0206, + "margin_dpo/margin_mean": 281.4169921875, + "margin_dpo/margin_std": 282.707275390625, + "step": 397 + }, + { + "KL/chosen_KL_mean": -356.6610412597656, + "KL/mean": -489.5479736328125, + "KL/rejected_KL_mean": -622.4349365234375, + "KL/std": 285.5445556640625, + "epoch": 0.5844346549192364, + "fcm_dpo/beta": 0.0016720399726182222, + "fcm_dpo/delta": -0.04650367423892021, + "fcm_dpo/margin": 265.7738342285156, + "fcm_dpo/q_t": 0.39866209030151367, + "grad_norm": 24.273387908935547, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.5021190643310547, + "logits/rejected": -0.4984011650085449, + "logps/chosen": -424.8125915527344, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -730.95849609375, + "loss": 1.0615, + "margin_dpo/margin_mean": 265.7738342285156, + "margin_dpo/margin_std": 334.2044677734375, + "step": 398 + }, + { + "KL/chosen_KL_mean": -323.0514221191406, + "KL/mean": -437.18963623046875, + "KL/rejected_KL_mean": -551.327880859375, + "KL/std": 256.7247314453125, + "epoch": 0.5859030837004405, + "fcm_dpo/beta": 0.0016683805733919144, + "fcm_dpo/delta": 0.019710222259163857, + "fcm_dpo/margin": 228.27645874023438, + "fcm_dpo/q_t": 0.4121752977371216, + "grad_norm": 26.480926513671875, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.45461803674697876, + "logits/rejected": -0.4152987003326416, + "logps/chosen": -383.94122314453125, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.965576171875, + "logps/rejected": -629.29345703125, + "loss": 1.1112, + "margin_dpo/margin_mean": 228.27645874023438, + "margin_dpo/margin_std": 323.93865966796875, + "step": 399 + }, + { + "KL/chosen_KL_mean": -316.2700500488281, + "KL/mean": -488.2925720214844, + "KL/rejected_KL_mean": -660.3151245117188, + "KL/std": 281.55780029296875, + "epoch": 0.5873715124816447, + "fcm_dpo/beta": 0.0016302517615258694, + "fcm_dpo/delta": -0.17095670104026794, + "fcm_dpo/margin": 344.0451354980469, + "fcm_dpo/q_t": 0.3687145709991455, + "grad_norm": 19.69918441772461, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.43350642919540405, + "logits/rejected": -0.4296361804008484, + "logps/chosen": -379.91363525390625, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -765.5678100585938, + "loss": 0.9622, + "margin_dpo/margin_mean": 344.04510498046875, + "margin_dpo/margin_std": 313.25408935546875, + "step": 400 + }, + { + "KL/chosen_KL_mean": -363.84393310546875, + "KL/mean": -460.88262939453125, + "KL/rejected_KL_mean": -557.92138671875, + "KL/std": 290.2945861816406, + "epoch": 0.5888399412628488, + "fcm_dpo/beta": 0.00163645064458251, + "fcm_dpo/delta": 0.08519099652767181, + "fcm_dpo/margin": 194.07742309570312, + "fcm_dpo/q_t": 0.42967379093170166, + "grad_norm": 24.62172508239746, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.4492917060852051, + "logits/rejected": -0.44258540868759155, + "logps/chosen": -421.0069580078125, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -641.7138671875, + "loss": 1.1887, + "margin_dpo/margin_mean": 194.07742309570312, + "margin_dpo/margin_std": 388.0357666015625, + "step": 401 + }, + { + "KL/chosen_KL_mean": -277.44989013671875, + "KL/mean": -456.07666015625, + "KL/rejected_KL_mean": -634.7034301757812, + "KL/std": 316.1929016113281, + "epoch": 0.5903083700440529, + "fcm_dpo/beta": 0.0016029919497668743, + "fcm_dpo/delta": -0.18321484327316284, + "fcm_dpo/margin": 357.2535400390625, + "fcm_dpo/q_t": 0.36773985624313354, + "grad_norm": 21.816125869750977, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.4676339626312256, + "logits/rejected": -0.4571627974510193, + "logps/chosen": -328.19024658203125, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -715.74951171875, + "loss": 0.9533, + "margin_dpo/margin_mean": 357.2535400390625, + "margin_dpo/margin_std": 325.81842041015625, + "step": 402 + }, + { + "KL/chosen_KL_mean": -314.04962158203125, + "KL/mean": -449.58245849609375, + "KL/rejected_KL_mean": -585.115234375, + "KL/std": 293.69842529296875, + "epoch": 0.591776798825257, + "fcm_dpo/beta": 0.0015804520808160305, + "fcm_dpo/delta": -0.029699519276618958, + "fcm_dpo/margin": 271.06561279296875, + "fcm_dpo/q_t": 0.4006895124912262, + "grad_norm": 27.178544998168945, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.4410826563835144, + "logits/rejected": -0.4390965700149536, + "logps/chosen": -361.19696044921875, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -662.3818359375, + "loss": 1.0603, + "margin_dpo/margin_mean": 271.06561279296875, + "margin_dpo/margin_std": 310.4146423339844, + "step": 403 + }, + { + "KL/chosen_KL_mean": -349.8211975097656, + "KL/mean": -481.5411376953125, + "KL/rejected_KL_mean": -613.2611083984375, + "KL/std": 281.17803955078125, + "epoch": 0.593245227606461, + "fcm_dpo/beta": 0.0015735691413283348, + "fcm_dpo/delta": -0.015212337486445904, + "fcm_dpo/margin": 263.43988037109375, + "fcm_dpo/q_t": 0.40503576397895813, + "grad_norm": 29.41777992248535, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.46026161313056946, + "logits/rejected": -0.45394134521484375, + "logps/chosen": -397.69647216796875, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -690.4160766601562, + "loss": 1.0903, + "margin_dpo/margin_mean": 263.43988037109375, + "margin_dpo/margin_std": 362.440673828125, + "step": 404 + }, + { + "KL/chosen_KL_mean": -395.85577392578125, + "KL/mean": -504.7757873535156, + "KL/rejected_KL_mean": -613.69580078125, + "KL/std": 314.71160888671875, + "epoch": 0.5947136563876652, + "fcm_dpo/beta": 0.0015670396387577057, + "fcm_dpo/delta": -0.04510403424501419, + "fcm_dpo/margin": 217.84002685546875, + "fcm_dpo/q_t": 0.4239059090614319, + "grad_norm": 35.01424789428711, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.4418267011642456, + "logits/rejected": -0.43295902013778687, + "logps/chosen": -461.0186767578125, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -700.882568359375, + "loss": 1.1669, + "margin_dpo/margin_mean": 217.84002685546875, + "margin_dpo/margin_std": 392.323486328125, + "step": 405 + }, + { + "KL/chosen_KL_mean": -354.01934814453125, + "KL/mean": -493.54443359375, + "KL/rejected_KL_mean": -633.069580078125, + "KL/std": 308.9290771484375, + "epoch": 0.5961820851688693, + "fcm_dpo/beta": 0.0015584398061037064, + "fcm_dpo/delta": -0.03671257197856903, + "fcm_dpo/margin": 279.05023193359375, + "fcm_dpo/q_t": 0.40036964416503906, + "grad_norm": 30.385452270507812, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.46909600496292114, + "logits/rejected": -0.47889643907546997, + "logps/chosen": -403.7601623535156, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -725.148193359375, + "loss": 1.0625, + "margin_dpo/margin_mean": 279.05023193359375, + "margin_dpo/margin_std": 336.1007995605469, + "step": 406 + }, + { + "KL/chosen_KL_mean": -377.31060791015625, + "KL/mean": -466.502685546875, + "KL/rejected_KL_mean": -555.6947631835938, + "KL/std": 247.73455810546875, + "epoch": 0.5976505139500734, + "fcm_dpo/beta": 0.001549946959130466, + "fcm_dpo/delta": 0.008951360359787941, + "fcm_dpo/margin": 178.3841552734375, + "fcm_dpo/q_t": 0.4347270131111145, + "grad_norm": 31.04163360595703, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.4896223545074463, + "logits/rejected": -0.46283426880836487, + "logps/chosen": -433.6413269042969, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.51209259033203, + "logps/rejected": -633.2068481445312, + "loss": 1.1926, + "margin_dpo/margin_mean": 178.38414001464844, + "margin_dpo/margin_std": 325.7279968261719, + "step": 407 + }, + { + "KL/chosen_KL_mean": -381.83782958984375, + "KL/mean": -482.71337890625, + "KL/rejected_KL_mean": -583.5889892578125, + "KL/std": 239.08734130859375, + "epoch": 0.5991189427312775, + "fcm_dpo/beta": 0.0015726467827335, + "fcm_dpo/delta": 0.0853329598903656, + "fcm_dpo/margin": 201.75115966796875, + "fcm_dpo/q_t": 0.42552345991134644, + "grad_norm": 24.999143600463867, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.4955484867095947, + "logits/rejected": -0.47467708587646484, + "logps/chosen": -451.62713623046875, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -673.6859130859375, + "loss": 1.1374, + "margin_dpo/margin_mean": 201.75115966796875, + "margin_dpo/margin_std": 270.3141784667969, + "step": 408 + }, + { + "KL/chosen_KL_mean": -364.642578125, + "KL/mean": -463.87872314453125, + "KL/rejected_KL_mean": -563.1149291992188, + "KL/std": 257.52569580078125, + "epoch": 0.6005873715124816, + "fcm_dpo/beta": 0.0015998759772628546, + "fcm_dpo/delta": 0.0850619375705719, + "fcm_dpo/margin": 198.47232055664062, + "fcm_dpo/q_t": 0.4262683391571045, + "grad_norm": 29.315763473510742, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.4769352376461029, + "logits/rejected": -0.4611578583717346, + "logps/chosen": -431.96002197265625, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -648.0191650390625, + "loss": 1.145, + "margin_dpo/margin_mean": 198.47232055664062, + "margin_dpo/margin_std": 289.51141357421875, + "step": 409 + }, + { + "KL/chosen_KL_mean": -339.183349609375, + "KL/mean": -455.3343505859375, + "KL/rejected_KL_mean": -571.4853515625, + "KL/std": 257.6640319824219, + "epoch": 0.6020558002936858, + "fcm_dpo/beta": 0.0016098904889076948, + "fcm_dpo/delta": 0.027035847306251526, + "fcm_dpo/margin": 232.30197143554688, + "fcm_dpo/q_t": 0.41175514459609985, + "grad_norm": 26.693159103393555, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.5109409093856812, + "logits/rejected": -0.5129973888397217, + "logps/chosen": -390.648681640625, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -654.684326171875, + "loss": 1.0976, + "margin_dpo/margin_mean": 232.30198669433594, + "margin_dpo/margin_std": 286.159912109375, + "step": 410 + }, + { + "KL/chosen_KL_mean": -354.3096923828125, + "KL/mean": -474.0059814453125, + "KL/rejected_KL_mean": -593.7022705078125, + "KL/std": 283.52850341796875, + "epoch": 0.6035242290748899, + "fcm_dpo/beta": 0.0016047862591221929, + "fcm_dpo/delta": 0.015297271311283112, + "fcm_dpo/margin": 239.392578125, + "fcm_dpo/q_t": 0.41245776414871216, + "grad_norm": 24.12554931640625, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.5120722055435181, + "logits/rejected": -0.520818293094635, + "logps/chosen": -406.6169738769531, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -674.397216796875, + "loss": 1.1053, + "margin_dpo/margin_mean": 239.392578125, + "margin_dpo/margin_std": 321.0933532714844, + "step": 411 + }, + { + "KL/chosen_KL_mean": -361.31805419921875, + "KL/mean": -484.0078125, + "KL/rejected_KL_mean": -606.6976318359375, + "KL/std": 272.78863525390625, + "epoch": 0.604992657856094, + "fcm_dpo/beta": 0.0016203692648559809, + "fcm_dpo/delta": 0.0024842238053679466, + "fcm_dpo/margin": 245.37954711914062, + "fcm_dpo/q_t": 0.40797942876815796, + "grad_norm": 33.52204132080078, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.5373940467834473, + "logits/rejected": -0.5674378275871277, + "logps/chosen": -414.4621887207031, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.0608139038086, + "logps/rejected": -706.7584228515625, + "loss": 1.0918, + "margin_dpo/margin_mean": 245.3795623779297, + "margin_dpo/margin_std": 322.3990173339844, + "step": 412 + }, + { + "KL/chosen_KL_mean": -378.59344482421875, + "KL/mean": -501.33282470703125, + "KL/rejected_KL_mean": -624.072265625, + "KL/std": 269.05731201171875, + "epoch": 0.6064610866372981, + "fcm_dpo/beta": 0.001623795717023313, + "fcm_dpo/delta": 0.0013288334012031555, + "fcm_dpo/margin": 245.4788055419922, + "fcm_dpo/q_t": 0.4057984948158264, + "grad_norm": 29.867643356323242, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.5124090909957886, + "logits/rejected": -0.5228337049484253, + "logps/chosen": -440.1754150390625, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -723.545654296875, + "loss": 1.081, + "margin_dpo/margin_mean": 245.4788055419922, + "margin_dpo/margin_std": 293.564697265625, + "step": 413 + }, + { + "KL/chosen_KL_mean": -343.92242431640625, + "KL/mean": -475.8585510253906, + "KL/rejected_KL_mean": -607.794677734375, + "KL/std": 259.8440856933594, + "epoch": 0.6079295154185022, + "fcm_dpo/beta": 0.0016230610199272633, + "fcm_dpo/delta": -0.030179578810930252, + "fcm_dpo/margin": 263.87225341796875, + "fcm_dpo/q_t": 0.3998476266860962, + "grad_norm": 43.11936950683594, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.45165306329727173, + "logits/rejected": -0.4454384446144104, + "logps/chosen": -390.5539245605469, + "logps/ref_chosen": -46.63148498535156, + "logps/ref_rejected": -87.64653015136719, + "logps/rejected": -695.441162109375, + "loss": 1.0618, + "margin_dpo/margin_mean": 263.87225341796875, + "margin_dpo/margin_std": 297.9873046875, + "step": 414 + }, + { + "KL/chosen_KL_mean": -382.28839111328125, + "KL/mean": -484.4581604003906, + "KL/rejected_KL_mean": -586.6279296875, + "KL/std": 261.7178649902344, + "epoch": 0.6093979441997063, + "fcm_dpo/beta": 0.001621844945475459, + "fcm_dpo/delta": 0.0709480568766594, + "fcm_dpo/margin": 204.3395233154297, + "fcm_dpo/q_t": 0.42238879203796387, + "grad_norm": 27.217483520507812, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.5291392207145691, + "logits/rejected": -0.5265468955039978, + "logps/chosen": -460.90667724609375, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -687.10546875, + "loss": 1.1367, + "margin_dpo/margin_mean": 204.3395233154297, + "margin_dpo/margin_std": 292.1011962890625, + "step": 415 + }, + { + "KL/chosen_KL_mean": -353.57177734375, + "KL/mean": -510.877197265625, + "KL/rejected_KL_mean": -668.1825561523438, + "KL/std": 296.62371826171875, + "epoch": 0.6108663729809104, + "fcm_dpo/beta": 0.0016112902667373419, + "fcm_dpo/delta": -0.11248860508203506, + "fcm_dpo/margin": 314.61083984375, + "fcm_dpo/q_t": 0.3814007043838501, + "grad_norm": 35.17634582519531, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.5343978404998779, + "logits/rejected": -0.5423879623413086, + "logps/chosen": -411.85089111328125, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -758.7512817382812, + "loss": 0.9928, + "margin_dpo/margin_mean": 314.61077880859375, + "margin_dpo/margin_std": 286.4470520019531, + "step": 416 + }, + { + "KL/chosen_KL_mean": -357.342041015625, + "KL/mean": -489.947265625, + "KL/rejected_KL_mean": -622.552490234375, + "KL/std": 266.30548095703125, + "epoch": 0.6123348017621145, + "fcm_dpo/beta": 0.0015942594036459923, + "fcm_dpo/delta": -0.023848645389080048, + "fcm_dpo/margin": 265.21044921875, + "fcm_dpo/q_t": 0.4002404808998108, + "grad_norm": 30.77235984802246, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.5373271703720093, + "logits/rejected": -0.5231212377548218, + "logps/chosen": -407.540771484375, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -690.704345703125, + "loss": 1.0501, + "margin_dpo/margin_mean": 265.21044921875, + "margin_dpo/margin_std": 265.76727294921875, + "step": 417 + }, + { + "KL/chosen_KL_mean": -382.75750732421875, + "KL/mean": -518.67626953125, + "KL/rejected_KL_mean": -654.5950317382812, + "KL/std": 304.6961669921875, + "epoch": 0.6138032305433186, + "fcm_dpo/beta": 0.001592871267348528, + "fcm_dpo/delta": -0.03536780923604965, + "fcm_dpo/margin": 271.8375244140625, + "fcm_dpo/q_t": 0.4023270905017853, + "grad_norm": 22.210895538330078, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.6004199385643005, + "logits/rejected": -0.5827990174293518, + "logps/chosen": -464.7359619140625, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -759.2864990234375, + "loss": 1.0779, + "margin_dpo/margin_mean": 271.8375244140625, + "margin_dpo/margin_std": 355.95672607421875, + "step": 418 + }, + { + "KL/chosen_KL_mean": -359.854248046875, + "KL/mean": -510.2494201660156, + "KL/rejected_KL_mean": -660.6445922851562, + "KL/std": 277.7416076660156, + "epoch": 0.6152716593245228, + "fcm_dpo/beta": 0.001561171025969088, + "fcm_dpo/delta": -0.07300984114408493, + "fcm_dpo/margin": 300.7904052734375, + "fcm_dpo/q_t": 0.39019423723220825, + "grad_norm": 28.61322784423828, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.5227010846138, + "logits/rejected": -0.5222500562667847, + "logps/chosen": -412.8028869628906, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -752.2276611328125, + "loss": 1.0314, + "margin_dpo/margin_mean": 300.7904052734375, + "margin_dpo/margin_std": 325.02825927734375, + "step": 419 + }, + { + "KL/chosen_KL_mean": -452.51690673828125, + "KL/mean": -546.392333984375, + "KL/rejected_KL_mean": -640.2677001953125, + "KL/std": 296.0887145996094, + "epoch": 0.6167400881057269, + "fcm_dpo/beta": 0.0015826968010514975, + "fcm_dpo/delta": 0.1057576984167099, + "fcm_dpo/margin": 187.75083923339844, + "fcm_dpo/q_t": 0.4339344799518585, + "grad_norm": 54.64779281616211, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.5721093416213989, + "logits/rejected": -0.5451463460922241, + "logps/chosen": -530.2868041992188, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -709.5875244140625, + "loss": 1.2047, + "margin_dpo/margin_mean": 187.75082397460938, + "margin_dpo/margin_std": 400.8773193359375, + "step": 420 + }, + { + "KL/chosen_KL_mean": -379.40533447265625, + "KL/mean": -485.106201171875, + "KL/rejected_KL_mean": -590.8070068359375, + "KL/std": 287.34088134765625, + "epoch": 0.618208516886931, + "fcm_dpo/beta": 0.0016100335633382201, + "fcm_dpo/delta": 0.0610845573246479, + "fcm_dpo/margin": 211.4017333984375, + "fcm_dpo/q_t": 0.4196345806121826, + "grad_norm": 27.371572494506836, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.5819834470748901, + "logits/rejected": -0.5714644193649292, + "logps/chosen": -433.17120361328125, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -680.0885009765625, + "loss": 1.1373, + "margin_dpo/margin_mean": 211.4017333984375, + "margin_dpo/margin_std": 309.2540283203125, + "step": 421 + }, + { + "KL/chosen_KL_mean": -423.903076171875, + "KL/mean": -544.1499633789062, + "KL/rejected_KL_mean": -664.3968505859375, + "KL/std": 294.40728759765625, + "epoch": 0.6196769456681351, + "fcm_dpo/beta": 0.0016107236733660102, + "fcm_dpo/delta": 0.013142132200300694, + "fcm_dpo/margin": 240.49374389648438, + "fcm_dpo/q_t": 0.4098985493183136, + "grad_norm": 33.85724639892578, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.6141137480735779, + "logits/rejected": -0.6188079118728638, + "logps/chosen": -492.536865234375, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -752.2603759765625, + "loss": 1.1085, + "margin_dpo/margin_mean": 240.49374389648438, + "margin_dpo/margin_std": 345.5645446777344, + "step": 422 + }, + { + "KL/chosen_KL_mean": -404.6935119628906, + "KL/mean": -534.154052734375, + "KL/rejected_KL_mean": -663.6144409179688, + "KL/std": 282.32171630859375, + "epoch": 0.6211453744493393, + "fcm_dpo/beta": 0.0016070720739662647, + "fcm_dpo/delta": -0.016826242208480835, + "fcm_dpo/margin": 258.9209899902344, + "fcm_dpo/q_t": 0.4035068154335022, + "grad_norm": 31.080442428588867, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.5942707061767578, + "logits/rejected": -0.6040855646133423, + "logps/chosen": -459.6838073730469, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -749.9210205078125, + "loss": 1.072, + "margin_dpo/margin_mean": 258.9209899902344, + "margin_dpo/margin_std": 313.36004638671875, + "step": 423 + }, + { + "KL/chosen_KL_mean": -365.32806396484375, + "KL/mean": -491.30462646484375, + "KL/rejected_KL_mean": -617.28125, + "KL/std": 282.08697509765625, + "epoch": 0.6226138032305433, + "fcm_dpo/beta": 0.0015995125286281109, + "fcm_dpo/delta": -0.003478415310382843, + "fcm_dpo/margin": 251.95314025878906, + "fcm_dpo/q_t": 0.40843045711517334, + "grad_norm": 33.83623123168945, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.577785313129425, + "logits/rejected": -0.555591881275177, + "logps/chosen": -421.3399658203125, + "logps/ref_chosen": -56.01192092895508, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -683.7601928710938, + "loss": 1.0919, + "margin_dpo/margin_mean": 251.95315551757812, + "margin_dpo/margin_std": 339.11346435546875, + "step": 424 + }, + { + "KL/chosen_KL_mean": -404.3356018066406, + "KL/mean": -499.7007141113281, + "KL/rejected_KL_mean": -595.0657958984375, + "KL/std": 263.603759765625, + "epoch": 0.6240822320117474, + "fcm_dpo/beta": 0.001631980761885643, + "fcm_dpo/delta": 0.09120546281337738, + "fcm_dpo/margin": 190.730224609375, + "fcm_dpo/q_t": 0.42644861340522766, + "grad_norm": 31.754074096679688, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.5840317606925964, + "logits/rejected": -0.5841487646102905, + "logps/chosen": -451.20458984375, + "logps/ref_chosen": -46.86899948120117, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -690.9912719726562, + "loss": 1.1673, + "margin_dpo/margin_mean": 190.730224609375, + "margin_dpo/margin_std": 325.1813049316406, + "step": 425 + }, + { + "KL/chosen_KL_mean": -375.7733154296875, + "KL/mean": -498.88397216796875, + "KL/rejected_KL_mean": -621.99462890625, + "KL/std": 270.572509765625, + "epoch": 0.6255506607929515, + "fcm_dpo/beta": 0.001641254872083664, + "fcm_dpo/delta": -0.004648171365261078, + "fcm_dpo/margin": 246.22134399414062, + "fcm_dpo/q_t": 0.40564680099487305, + "grad_norm": 29.338314056396484, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.5852512121200562, + "logits/rejected": -0.5579032897949219, + "logps/chosen": -452.35687255859375, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -703.26123046875, + "loss": 1.0805, + "margin_dpo/margin_mean": 246.22132873535156, + "margin_dpo/margin_std": 294.9763488769531, + "step": 426 + }, + { + "KL/chosen_KL_mean": -339.8883361816406, + "KL/mean": -433.8622741699219, + "KL/rejected_KL_mean": -527.8362426757812, + "KL/std": 233.69332885742188, + "epoch": 0.6270190895741556, + "fcm_dpo/beta": 0.0016534591559320688, + "fcm_dpo/delta": 0.09219777584075928, + "fcm_dpo/margin": 187.9479217529297, + "fcm_dpo/q_t": 0.4280179440975189, + "grad_norm": 25.174968719482422, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.6304788589477539, + "logits/rejected": -0.6161661744117737, + "logps/chosen": -404.7421875, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.5660171508789, + "logps/rejected": -606.4022216796875, + "loss": 1.1492, + "margin_dpo/margin_mean": 187.94793701171875, + "margin_dpo/margin_std": 274.6386413574219, + "step": 427 + }, + { + "KL/chosen_KL_mean": -421.1455383300781, + "KL/mean": -548.4664306640625, + "KL/rejected_KL_mean": -675.787353515625, + "KL/std": 309.02484130859375, + "epoch": 0.6284875183553598, + "fcm_dpo/beta": 0.0016524514649063349, + "fcm_dpo/delta": -0.022071223706007004, + "fcm_dpo/margin": 254.64175415039062, + "fcm_dpo/q_t": 0.4040681719779968, + "grad_norm": 36.647064208984375, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.5900696516036987, + "logits/rejected": -0.5930036306381226, + "logps/chosen": -483.7822265625, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28181457519531, + "logps/rejected": -779.0691528320312, + "loss": 1.1009, + "margin_dpo/margin_mean": 254.64175415039062, + "margin_dpo/margin_std": 378.00115966796875, + "step": 428 + }, + { + "KL/chosen_KL_mean": -412.0851135253906, + "KL/mean": -507.26202392578125, + "KL/rejected_KL_mean": -602.43896484375, + "KL/std": 265.12261962890625, + "epoch": 0.6299559471365639, + "fcm_dpo/beta": 0.0016535113099962473, + "fcm_dpo/delta": -0.020081549882888794, + "fcm_dpo/margin": 190.3538818359375, + "fcm_dpo/q_t": 0.42727112770080566, + "grad_norm": 34.380889892578125, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.5973831415176392, + "logits/rejected": -0.5831949710845947, + "logps/chosen": -493.3191223144531, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -694.23388671875, + "loss": 1.171, + "margin_dpo/margin_mean": 190.3538818359375, + "margin_dpo/margin_std": 328.217041015625, + "step": 429 + }, + { + "KL/chosen_KL_mean": -324.106689453125, + "KL/mean": -425.34185791015625, + "KL/rejected_KL_mean": -526.5770263671875, + "KL/std": 241.1708221435547, + "epoch": 0.631424375917768, + "fcm_dpo/beta": 0.0016685712616890669, + "fcm_dpo/delta": 0.06419498473405838, + "fcm_dpo/margin": 202.4704132080078, + "fcm_dpo/q_t": 0.42112964391708374, + "grad_norm": 28.673410415649414, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5664153099060059, + "logits/rejected": -0.562206506729126, + "logps/chosen": -385.0269775390625, + "logps/ref_chosen": -60.920326232910156, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -630.9998779296875, + "loss": 1.1216, + "margin_dpo/margin_mean": 202.4704132080078, + "margin_dpo/margin_std": 261.1304016113281, + "step": 430 + }, + { + "KL/chosen_KL_mean": -300.27374267578125, + "KL/mean": -448.88720703125, + "KL/rejected_KL_mean": -597.5006103515625, + "KL/std": 266.6134338378906, + "epoch": 0.6328928046989721, + "fcm_dpo/beta": 0.001646057702600956, + "fcm_dpo/delta": -0.09401103109121323, + "fcm_dpo/margin": 297.22686767578125, + "fcm_dpo/q_t": 0.38535940647125244, + "grad_norm": 27.440969467163086, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.53639817237854, + "logits/rejected": -0.5400429964065552, + "logps/chosen": -357.62249755859375, + "logps/ref_chosen": -57.34874725341797, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -690.3408203125, + "loss": 1.01, + "margin_dpo/margin_mean": 297.2269287109375, + "margin_dpo/margin_std": 285.8399658203125, + "step": 431 + }, + { + "KL/chosen_KL_mean": -309.4350891113281, + "KL/mean": -450.547119140625, + "KL/rejected_KL_mean": -591.6591186523438, + "KL/std": 265.4091796875, + "epoch": 0.6343612334801763, + "fcm_dpo/beta": 0.0016348997596651316, + "fcm_dpo/delta": -0.06477323174476624, + "fcm_dpo/margin": 282.22406005859375, + "fcm_dpo/q_t": 0.3924236297607422, + "grad_norm": 43.64820098876953, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.502853274345398, + "logits/rejected": -0.4937119781970978, + "logps/chosen": -380.5098876953125, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -690.2386474609375, + "loss": 1.0284, + "margin_dpo/margin_mean": 282.2240295410156, + "margin_dpo/margin_std": 273.5948181152344, + "step": 432 + }, + { + "KL/chosen_KL_mean": -404.2077331542969, + "KL/mean": -498.65142822265625, + "KL/rejected_KL_mean": -593.0950927734375, + "KL/std": 266.9083251953125, + "epoch": 0.6358296622613803, + "fcm_dpo/beta": 0.0016442297492176294, + "fcm_dpo/delta": 0.09210029989480972, + "fcm_dpo/margin": 188.88734436035156, + "fcm_dpo/q_t": 0.42625609040260315, + "grad_norm": 37.26985549926758, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.5648001432418823, + "logits/rejected": -0.5520174503326416, + "logps/chosen": -462.4809265136719, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -689.0460205078125, + "loss": 1.1645, + "margin_dpo/margin_mean": 188.88734436035156, + "margin_dpo/margin_std": 314.8270263671875, + "step": 433 + }, + { + "KL/chosen_KL_mean": -358.794921875, + "KL/mean": -477.2853088378906, + "KL/rejected_KL_mean": -595.7756958007812, + "KL/std": 286.1195068359375, + "epoch": 0.6372980910425844, + "fcm_dpo/beta": 0.001646613236516714, + "fcm_dpo/delta": 0.010008249431848526, + "fcm_dpo/margin": 236.9807586669922, + "fcm_dpo/q_t": 0.4145994186401367, + "grad_norm": 22.998327255249023, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.5716849565505981, + "logits/rejected": -0.5764377117156982, + "logps/chosen": -420.76861572265625, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -674.2742919921875, + "loss": 1.1083, + "margin_dpo/margin_mean": 236.98074340820312, + "margin_dpo/margin_std": 357.0912780761719, + "step": 434 + }, + { + "KL/chosen_KL_mean": -329.38238525390625, + "KL/mean": -454.7186279296875, + "KL/rejected_KL_mean": -580.0548706054688, + "KL/std": 273.7630615234375, + "epoch": 0.6387665198237885, + "fcm_dpo/beta": 0.0016480737831443548, + "fcm_dpo/delta": -0.01371398288756609, + "fcm_dpo/margin": 250.6724853515625, + "fcm_dpo/q_t": 0.40409788489341736, + "grad_norm": 30.54098892211914, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.5849795937538147, + "logits/rejected": -0.5741031169891357, + "logps/chosen": -380.8844299316406, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -667.6217651367188, + "loss": 1.0804, + "margin_dpo/margin_mean": 250.6724853515625, + "margin_dpo/margin_std": 310.9295654296875, + "step": 435 + }, + { + "KL/chosen_KL_mean": -350.72491455078125, + "KL/mean": -460.7542724609375, + "KL/rejected_KL_mean": -570.7836303710938, + "KL/std": 239.6090087890625, + "epoch": 0.6402349486049926, + "fcm_dpo/beta": 0.0016525493701919913, + "fcm_dpo/delta": 0.03764678165316582, + "fcm_dpo/margin": 220.05868530273438, + "fcm_dpo/q_t": 0.41519662737846375, + "grad_norm": 44.92102813720703, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.5648950934410095, + "logits/rejected": -0.5372939109802246, + "logps/chosen": -422.1286315917969, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -653.5113525390625, + "loss": 1.1082, + "margin_dpo/margin_mean": 220.05868530273438, + "margin_dpo/margin_std": 284.269287109375, + "step": 436 + }, + { + "KL/chosen_KL_mean": -357.39385986328125, + "KL/mean": -470.41290283203125, + "KL/rejected_KL_mean": -583.431884765625, + "KL/std": 238.08837890625, + "epoch": 0.6417033773861968, + "fcm_dpo/beta": 0.0016638417728245258, + "fcm_dpo/delta": 0.02482348121702671, + "fcm_dpo/margin": 226.03802490234375, + "fcm_dpo/q_t": 0.41194066405296326, + "grad_norm": 30.965953826904297, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.5961349010467529, + "logits/rejected": -0.575666606426239, + "logps/chosen": -422.13812255859375, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -665.4754638671875, + "loss": 1.1003, + "margin_dpo/margin_mean": 226.03802490234375, + "margin_dpo/margin_std": 288.8181457519531, + "step": 437 + }, + { + "KL/chosen_KL_mean": -368.9373779296875, + "KL/mean": -496.147216796875, + "KL/rejected_KL_mean": -623.3570556640625, + "KL/std": 269.57830810546875, + "epoch": 0.6431718061674009, + "fcm_dpo/beta": 0.0016566277481615543, + "fcm_dpo/delta": -0.02283564768731594, + "fcm_dpo/margin": 254.41964721679688, + "fcm_dpo/q_t": 0.40168195962905884, + "grad_norm": 31.922332763671875, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.6157523393630981, + "logits/rejected": -0.6025946736335754, + "logps/chosen": -427.9560546875, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682800292969, + "logps/rejected": -706.433837890625, + "loss": 1.0646, + "margin_dpo/margin_mean": 254.41964721679688, + "margin_dpo/margin_std": 291.51605224609375, + "step": 438 + }, + { + "KL/chosen_KL_mean": -385.7437744140625, + "KL/mean": -491.2313537597656, + "KL/rejected_KL_mean": -596.718994140625, + "KL/std": 272.39862060546875, + "epoch": 0.644640234948605, + "fcm_dpo/beta": 0.0016382005997002125, + "fcm_dpo/delta": -0.06990180164575577, + "fcm_dpo/margin": 210.9752655029297, + "fcm_dpo/q_t": 0.420367956161499, + "grad_norm": 29.01395606994629, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.5831667184829712, + "logits/rejected": -0.5851659774780273, + "logps/chosen": -439.52783203125, + "logps/ref_chosen": -53.78407669067383, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -680.7044677734375, + "loss": 1.1382, + "margin_dpo/margin_mean": 210.97525024414062, + "margin_dpo/margin_std": 309.2723388671875, + "step": 439 + }, + { + "KL/chosen_KL_mean": -426.24176025390625, + "KL/mean": -549.2017822265625, + "KL/rejected_KL_mean": -672.161865234375, + "KL/std": 338.7626953125, + "epoch": 0.6461086637298091, + "fcm_dpo/beta": 0.0016412187833338976, + "fcm_dpo/delta": -0.0038700848817825317, + "fcm_dpo/margin": 245.9200897216797, + "fcm_dpo/q_t": 0.41035932302474976, + "grad_norm": 30.73674201965332, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.6162642240524292, + "logits/rejected": -0.6109949946403503, + "logps/chosen": -504.8084716796875, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -768.65966796875, + "loss": 1.0978, + "margin_dpo/margin_mean": 245.92010498046875, + "margin_dpo/margin_std": 351.30108642578125, + "step": 440 + }, + { + "KL/chosen_KL_mean": -475.21966552734375, + "KL/mean": -594.8802490234375, + "KL/rejected_KL_mean": -714.5408935546875, + "KL/std": 347.33465576171875, + "epoch": 0.6475770925110133, + "fcm_dpo/beta": 0.0016440332401543856, + "fcm_dpo/delta": 0.006565794348716736, + "fcm_dpo/margin": 239.3212127685547, + "fcm_dpo/q_t": 0.4138296842575073, + "grad_norm": 38.487876892089844, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.6530791521072388, + "logits/rejected": -0.6417888402938843, + "logps/chosen": -536.0440673828125, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -811.0116577148438, + "loss": 1.1403, + "margin_dpo/margin_mean": 239.3212127685547, + "margin_dpo/margin_std": 414.8189392089844, + "step": 441 + }, + { + "KL/chosen_KL_mean": -410.76324462890625, + "KL/mean": -554.4638671875, + "KL/rejected_KL_mean": -698.1644287109375, + "KL/std": 337.224853515625, + "epoch": 0.6490455212922174, + "fcm_dpo/beta": 0.0016326969489455223, + "fcm_dpo/delta": -0.07312282174825668, + "fcm_dpo/margin": 287.401123046875, + "fcm_dpo/q_t": 0.39594757556915283, + "grad_norm": 32.10857391357422, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.6253660321235657, + "logits/rejected": -0.6190581321716309, + "logps/chosen": -457.77447509765625, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -774.7036743164062, + "loss": 1.0621, + "margin_dpo/margin_mean": 287.401123046875, + "margin_dpo/margin_std": 378.95294189453125, + "step": 442 + }, + { + "KL/chosen_KL_mean": -458.8716735839844, + "KL/mean": -553.6380615234375, + "KL/rejected_KL_mean": -648.4044799804688, + "KL/std": 334.3310546875, + "epoch": 0.6505139500734214, + "fcm_dpo/beta": 0.0016142401145771146, + "fcm_dpo/delta": -0.0032433748710900545, + "fcm_dpo/margin": 189.53274536132812, + "fcm_dpo/q_t": 0.43172866106033325, + "grad_norm": 36.12958526611328, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6477606296539307, + "logits/rejected": -0.6300950050354004, + "logps/chosen": -530.1447143554688, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -735.0844116210938, + "loss": 1.2045, + "margin_dpo/margin_mean": 189.53274536132812, + "margin_dpo/margin_std": 403.9544677734375, + "step": 443 + }, + { + "KL/chosen_KL_mean": -452.4591064453125, + "KL/mean": -600.4232177734375, + "KL/rejected_KL_mean": -748.3873291015625, + "KL/std": 348.21685791015625, + "epoch": 0.6519823788546255, + "fcm_dpo/beta": 0.0015917312121018767, + "fcm_dpo/delta": -0.0749378427863121, + "fcm_dpo/margin": 295.9281921386719, + "fcm_dpo/q_t": 0.39380595088005066, + "grad_norm": 33.08613586425781, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.6086077690124512, + "logits/rejected": -0.6182563900947571, + "logps/chosen": -509.6728515625, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489807128906, + "logps/rejected": -845.6422119140625, + "loss": 1.0518, + "margin_dpo/margin_mean": 295.92822265625, + "margin_dpo/margin_std": 379.2266845703125, + "step": 444 + }, + { + "KL/chosen_KL_mean": -396.7808532714844, + "KL/mean": -538.607177734375, + "KL/rejected_KL_mean": -680.43359375, + "KL/std": 282.3050537109375, + "epoch": 0.6534508076358296, + "fcm_dpo/beta": 0.0015803833957761526, + "fcm_dpo/delta": -0.05051477625966072, + "fcm_dpo/margin": 283.6526794433594, + "fcm_dpo/q_t": 0.39755940437316895, + "grad_norm": 30.974002838134766, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.6230882406234741, + "logits/rejected": -0.6196198463439941, + "logps/chosen": -464.0806579589844, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267059326172, + "logps/rejected": -773.1162109375, + "loss": 1.0577, + "margin_dpo/margin_mean": 283.6527099609375, + "margin_dpo/margin_std": 343.00897216796875, + "step": 445 + }, + { + "KL/chosen_KL_mean": -362.6717529296875, + "KL/mean": -511.12188720703125, + "KL/rejected_KL_mean": -659.572021484375, + "KL/std": 300.0148620605469, + "epoch": 0.6549192364170338, + "fcm_dpo/beta": 0.0015668668784201145, + "fcm_dpo/delta": -0.06862294673919678, + "fcm_dpo/margin": 296.90032958984375, + "fcm_dpo/q_t": 0.39133375883102417, + "grad_norm": 34.6982421875, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.6164995431900024, + "logits/rejected": -0.6332226991653442, + "logps/chosen": -421.77020263671875, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -760.836181640625, + "loss": 1.0322, + "margin_dpo/margin_mean": 296.90032958984375, + "margin_dpo/margin_std": 308.796875, + "step": 446 + }, + { + "KL/chosen_KL_mean": -374.0321044921875, + "KL/mean": -528.794189453125, + "KL/rejected_KL_mean": -683.5562744140625, + "KL/std": 340.5907897949219, + "epoch": 0.6563876651982379, + "fcm_dpo/beta": 0.0015306383138522506, + "fcm_dpo/delta": -0.07781445980072021, + "fcm_dpo/margin": 309.5242004394531, + "fcm_dpo/q_t": 0.3929889500141144, + "grad_norm": 34.17955780029297, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.6846290826797485, + "logits/rejected": -0.6984615325927734, + "logps/chosen": -430.107421875, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -782.2510986328125, + "loss": 1.0422, + "margin_dpo/margin_mean": 309.5242004394531, + "margin_dpo/margin_std": 381.69482421875, + "step": 447 + }, + { + "KL/chosen_KL_mean": -422.5325622558594, + "KL/mean": -566.2469482421875, + "KL/rejected_KL_mean": -709.9613037109375, + "KL/std": 306.77191162109375, + "epoch": 0.657856093979442, + "fcm_dpo/beta": 0.0015249757561832666, + "fcm_dpo/delta": -0.040184423327445984, + "fcm_dpo/margin": 287.4287414550781, + "fcm_dpo/q_t": 0.3992360234260559, + "grad_norm": 42.507076263427734, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.579893171787262, + "logits/rejected": -0.5896936655044556, + "logps/chosen": -482.5364074707031, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -812.2259521484375, + "loss": 1.0631, + "margin_dpo/margin_mean": 287.42877197265625, + "margin_dpo/margin_std": 350.75927734375, + "step": 448 + }, + { + "KL/chosen_KL_mean": -422.4124755859375, + "KL/mean": -570.7263793945312, + "KL/rejected_KL_mean": -719.040283203125, + "KL/std": 365.52325439453125, + "epoch": 0.6593245227606461, + "fcm_dpo/beta": 0.0015035069081932306, + "fcm_dpo/delta": -0.048243433237075806, + "fcm_dpo/margin": 296.62786865234375, + "fcm_dpo/q_t": 0.40181848406791687, + "grad_norm": 32.49612808227539, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6347248554229736, + "logits/rejected": -0.6339551210403442, + "logps/chosen": -489.9390869140625, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -807.63720703125, + "loss": 1.0819, + "margin_dpo/margin_mean": 296.62786865234375, + "margin_dpo/margin_std": 435.5914306640625, + "step": 449 + }, + { + "KL/chosen_KL_mean": -343.0860595703125, + "KL/mean": -506.05120849609375, + "KL/rejected_KL_mean": -669.016357421875, + "KL/std": 322.73223876953125, + "epoch": 0.6607929515418502, + "fcm_dpo/beta": 0.0014846834819763899, + "fcm_dpo/delta": -0.08808425813913345, + "fcm_dpo/margin": 325.9302978515625, + "fcm_dpo/q_t": 0.3863303065299988, + "grad_norm": 61.81324005126953, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6573776006698608, + "logits/rejected": -0.6733522415161133, + "logps/chosen": -400.1941833496094, + "logps/ref_chosen": -57.10811996459961, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -771.7713623046875, + "loss": 1.013, + "margin_dpo/margin_mean": 325.9302978515625, + "margin_dpo/margin_std": 321.34356689453125, + "step": 450 + }, + { + "KL/chosen_KL_mean": -470.1484375, + "KL/mean": -591.3046875, + "KL/rejected_KL_mean": -712.4609985351562, + "KL/std": 378.45947265625, + "epoch": 0.6622613803230544, + "fcm_dpo/beta": 0.001488700625486672, + "fcm_dpo/delta": 0.04048318788409233, + "fcm_dpo/margin": 242.31259155273438, + "fcm_dpo/q_t": 0.4151974320411682, + "grad_norm": 32.96580505371094, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.7209557294845581, + "logits/rejected": -0.7045374512672424, + "logps/chosen": -528.6172485351562, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -785.390380859375, + "loss": 1.153, + "margin_dpo/margin_mean": 242.31259155273438, + "margin_dpo/margin_std": 429.1678466796875, + "step": 451 + }, + { + "KL/chosen_KL_mean": -337.15887451171875, + "KL/mean": -480.21685791015625, + "KL/rejected_KL_mean": -623.2747802734375, + "KL/std": 290.8729248046875, + "epoch": 0.6637298091042585, + "fcm_dpo/beta": 0.001479277154430747, + "fcm_dpo/delta": -0.024412650614976883, + "fcm_dpo/margin": 286.115966796875, + "fcm_dpo/q_t": 0.39996248483657837, + "grad_norm": 22.152088165283203, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.6340690851211548, + "logits/rejected": -0.6552602052688599, + "logps/chosen": -387.5494384765625, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77142333984375, + "logps/rejected": -721.0462646484375, + "loss": 1.0531, + "margin_dpo/margin_mean": 286.115966796875, + "margin_dpo/margin_std": 296.19451904296875, + "step": 452 + }, + { + "KL/chosen_KL_mean": -392.34857177734375, + "KL/mean": -526.1040649414062, + "KL/rejected_KL_mean": -659.8594970703125, + "KL/std": 298.7330322265625, + "epoch": 0.6651982378854625, + "fcm_dpo/beta": 0.0014776124153286219, + "fcm_dpo/delta": 0.004782242700457573, + "fcm_dpo/margin": 267.510986328125, + "fcm_dpo/q_t": 0.4101276695728302, + "grad_norm": 30.73015785217285, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.6592116355895996, + "logits/rejected": -0.6468891501426697, + "logps/chosen": -450.06341552734375, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -742.0669555664062, + "loss": 1.0938, + "margin_dpo/margin_mean": 267.510986328125, + "margin_dpo/margin_std": 357.81243896484375, + "step": 453 + }, + { + "KL/chosen_KL_mean": -472.8430480957031, + "KL/mean": -623.6031494140625, + "KL/rejected_KL_mean": -774.3634033203125, + "KL/std": 340.2906494140625, + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.0014682337641716003, + "fcm_dpo/delta": -0.044936180114746094, + "fcm_dpo/margin": 301.52032470703125, + "fcm_dpo/q_t": 0.3982999324798584, + "grad_norm": 27.640525817871094, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.7141397595405579, + "logits/rejected": -0.707220196723938, + "logps/chosen": -533.7886962890625, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.95079040527344, + "logps/rejected": -859.3141479492188, + "loss": 1.0603, + "margin_dpo/margin_mean": 301.52032470703125, + "margin_dpo/margin_std": 374.39154052734375, + "step": 454 + }, + { + "KL/chosen_KL_mean": -406.83978271484375, + "KL/mean": -583.8450927734375, + "KL/rejected_KL_mean": -760.850341796875, + "KL/std": 351.13433837890625, + "epoch": 0.6681350954478708, + "fcm_dpo/beta": 0.0014530689222738147, + "fcm_dpo/delta": -0.12079726159572601, + "fcm_dpo/margin": 354.0106201171875, + "fcm_dpo/q_t": 0.38350850343704224, + "grad_norm": 39.29196548461914, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6605424284934998, + "logits/rejected": -0.683269202709198, + "logps/chosen": -451.7264709472656, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -876.15185546875, + "loss": 1.0177, + "margin_dpo/margin_mean": 354.0106201171875, + "margin_dpo/margin_std": 396.34912109375, + "step": 455 + }, + { + "KL/chosen_KL_mean": -423.3841552734375, + "KL/mean": -593.1605224609375, + "KL/rejected_KL_mean": -762.9368896484375, + "KL/std": 356.31634521484375, + "epoch": 0.6696035242290749, + "fcm_dpo/beta": 0.0014169735368341208, + "fcm_dpo/delta": -0.08517496287822723, + "fcm_dpo/margin": 339.55279541015625, + "fcm_dpo/q_t": 0.3890087902545929, + "grad_norm": 29.1214599609375, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.6683529615402222, + "logits/rejected": -0.6928262710571289, + "logps/chosen": -480.4209289550781, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21784210205078, + "logps/rejected": -868.1547241210938, + "loss": 1.0156, + "margin_dpo/margin_mean": 339.552734375, + "margin_dpo/margin_std": 345.1535949707031, + "step": 456 + }, + { + "KL/chosen_KL_mean": -411.3397216796875, + "KL/mean": -585.0991821289062, + "KL/rejected_KL_mean": -758.858642578125, + "KL/std": 348.389892578125, + "epoch": 0.671071953010279, + "fcm_dpo/beta": 0.0013881283812224865, + "fcm_dpo/delta": -0.08688442409038544, + "fcm_dpo/margin": 347.5188903808594, + "fcm_dpo/q_t": 0.3869348466396332, + "grad_norm": 29.03094482421875, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.6927535533905029, + "logits/rejected": -0.6937886476516724, + "logps/chosen": -465.582275390625, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -843.9682006835938, + "loss": 1.0138, + "margin_dpo/margin_mean": 347.5188903808594, + "margin_dpo/margin_std": 340.45025634765625, + "step": 457 + }, + { + "KL/chosen_KL_mean": -413.8070068359375, + "KL/mean": -569.9956665039062, + "KL/rejected_KL_mean": -726.184326171875, + "KL/std": 318.3636474609375, + "epoch": 0.6725403817914831, + "fcm_dpo/beta": 0.0013727301266044378, + "fcm_dpo/delta": -0.030638840049505234, + "fcm_dpo/margin": 312.3773193359375, + "fcm_dpo/q_t": 0.401597797870636, + "grad_norm": 23.270376205444336, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.7083392143249512, + "logits/rejected": -0.7040765285491943, + "logps/chosen": -469.21588134765625, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -823.8675537109375, + "loss": 1.0625, + "margin_dpo/margin_mean": 312.3773193359375, + "margin_dpo/margin_std": 366.5815734863281, + "step": 458 + }, + { + "KL/chosen_KL_mean": -460.8892822265625, + "KL/mean": -626.6010131835938, + "KL/rejected_KL_mean": -792.312744140625, + "KL/std": 359.20355224609375, + "epoch": 0.6740088105726872, + "fcm_dpo/beta": 0.0013611916219815612, + "fcm_dpo/delta": -0.05394328758120537, + "fcm_dpo/margin": 331.42340087890625, + "fcm_dpo/q_t": 0.3961183726787567, + "grad_norm": 29.07042121887207, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.690535306930542, + "logits/rejected": -0.708480954170227, + "logps/chosen": -507.44677734375, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -878.4812622070312, + "loss": 1.0548, + "margin_dpo/margin_mean": 331.4234313964844, + "margin_dpo/margin_std": 397.4347839355469, + "step": 459 + }, + { + "KL/chosen_KL_mean": -498.5191650390625, + "KL/mean": -676.511474609375, + "KL/rejected_KL_mean": -854.5037841796875, + "KL/std": 354.446533203125, + "epoch": 0.6754772393538914, + "fcm_dpo/beta": 0.001346941338852048, + "fcm_dpo/delta": -0.08341852575540543, + "fcm_dpo/margin": 355.984619140625, + "fcm_dpo/q_t": 0.3894132971763611, + "grad_norm": 34.67963790893555, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.7603031396865845, + "logits/rejected": -0.7895260453224182, + "logps/chosen": -550.154052734375, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -958.6231689453125, + "loss": 1.0251, + "margin_dpo/margin_mean": 355.984619140625, + "margin_dpo/margin_std": 386.4249572753906, + "step": 460 + }, + { + "KL/chosen_KL_mean": -527.8145751953125, + "KL/mean": -668.647705078125, + "KL/rejected_KL_mean": -809.4808349609375, + "KL/std": 355.2934265136719, + "epoch": 0.6769456681350955, + "fcm_dpo/beta": 0.001346740871667862, + "fcm_dpo/delta": 0.02132502943277359, + "fcm_dpo/margin": 281.6662902832031, + "fcm_dpo/q_t": 0.4133082628250122, + "grad_norm": 28.388534545898438, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.7709100842475891, + "logits/rejected": -0.7842754125595093, + "logps/chosen": -582.9965209960938, + "logps/ref_chosen": -55.18195724487305, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -895.957763671875, + "loss": 1.1057, + "margin_dpo/margin_mean": 281.666259765625, + "margin_dpo/margin_std": 388.0255432128906, + "step": 461 + }, + { + "KL/chosen_KL_mean": -528.8763427734375, + "KL/mean": -648.3497314453125, + "KL/rejected_KL_mean": -767.8230590820312, + "KL/std": 363.253662109375, + "epoch": 0.6784140969162996, + "fcm_dpo/beta": 0.0013595143100246787, + "fcm_dpo/delta": 0.0777268186211586, + "fcm_dpo/margin": 238.94668579101562, + "fcm_dpo/q_t": 0.4269101023674011, + "grad_norm": 32.2818717956543, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.83504319190979, + "logits/rejected": -0.8283437490463257, + "logps/chosen": -598.804443359375, + "logps/ref_chosen": -69.92803192138672, + "logps/ref_rejected": -78.84111022949219, + "logps/rejected": -846.6641845703125, + "loss": 1.1576, + "margin_dpo/margin_mean": 238.94668579101562, + "margin_dpo/margin_std": 403.227294921875, + "step": 462 + }, + { + "KL/chosen_KL_mean": -522.392578125, + "KL/mean": -674.9669799804688, + "KL/rejected_KL_mean": -827.5413818359375, + "KL/std": 369.949951171875, + "epoch": 0.6798825256975036, + "fcm_dpo/beta": 0.0013674467336386442, + "fcm_dpo/delta": -0.018231874331831932, + "fcm_dpo/margin": 305.1488037109375, + "fcm_dpo/q_t": 0.4052046537399292, + "grad_norm": 36.683773040771484, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.7768852710723877, + "logits/rejected": -0.7839001417160034, + "logps/chosen": -577.6669921875, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -916.5663452148438, + "loss": 1.0875, + "margin_dpo/margin_mean": 305.1487731933594, + "margin_dpo/margin_std": 413.82952880859375, + "step": 463 + }, + { + "KL/chosen_KL_mean": -523.5928955078125, + "KL/mean": -754.2376098632812, + "KL/rejected_KL_mean": -984.88232421875, + "KL/std": 429.15692138671875, + "epoch": 0.6813509544787077, + "fcm_dpo/beta": 0.0013100993819534779, + "fcm_dpo/delta": -0.2194000482559204, + "fcm_dpo/margin": 461.2894592285156, + "fcm_dpo/q_t": 0.3615615665912628, + "grad_norm": 38.82695770263672, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.7948259115219116, + "logits/rejected": -0.8514028787612915, + "logps/chosen": -574.5052490234375, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -1087.3717041015625, + "loss": 0.9451, + "margin_dpo/margin_mean": 461.2894287109375, + "margin_dpo/margin_std": 441.2557678222656, + "step": 464 + }, + { + "KL/chosen_KL_mean": -508.05780029296875, + "KL/mean": -700.8915405273438, + "KL/rejected_KL_mean": -893.725341796875, + "KL/std": 442.56414794921875, + "epoch": 0.6828193832599119, + "fcm_dpo/beta": 0.0012883164454251528, + "fcm_dpo/delta": -0.10184454917907715, + "fcm_dpo/margin": 385.66748046875, + "fcm_dpo/q_t": 0.38422703742980957, + "grad_norm": 43.68606948852539, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.7833234071731567, + "logits/rejected": -0.8054988980293274, + "logps/chosen": -568.1746826171875, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -1007.67138671875, + "loss": 1.0303, + "margin_dpo/margin_mean": 385.66748046875, + "margin_dpo/margin_std": 453.04437255859375, + "step": 465 + }, + { + "KL/chosen_KL_mean": -554.3604125976562, + "KL/mean": -719.1517944335938, + "KL/rejected_KL_mean": -883.9432373046875, + "KL/std": 401.8727111816406, + "epoch": 0.684287812041116, + "fcm_dpo/beta": 0.0012672768207266927, + "fcm_dpo/delta": -0.01892733946442604, + "fcm_dpo/margin": 329.58282470703125, + "fcm_dpo/q_t": 0.4031521677970886, + "grad_norm": 32.980648040771484, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.7898865938186646, + "logits/rejected": -0.7923921942710876, + "logps/chosen": -607.2813110351562, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -974.2586669921875, + "loss": 1.0868, + "margin_dpo/margin_mean": 329.58282470703125, + "margin_dpo/margin_std": 441.3002624511719, + "step": 466 + }, + { + "KL/chosen_KL_mean": -718.9763793945312, + "KL/mean": -897.8390502929688, + "KL/rejected_KL_mean": -1076.70166015625, + "KL/std": 534.5426635742188, + "epoch": 0.6857562408223201, + "fcm_dpo/beta": 0.0012504856567829847, + "fcm_dpo/delta": -0.051718711853027344, + "fcm_dpo/margin": 357.7253112792969, + "fcm_dpo/q_t": 0.4021691381931305, + "grad_norm": 48.61074447631836, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.899175763130188, + "logits/rejected": -0.8960803747177124, + "logps/chosen": -797.6921997070312, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -1179.5618896484375, + "loss": 1.1327, + "margin_dpo/margin_mean": 357.72528076171875, + "margin_dpo/margin_std": 623.5672607421875, + "step": 467 + }, + { + "KL/chosen_KL_mean": -574.6669921875, + "KL/mean": -787.5557861328125, + "KL/rejected_KL_mean": -1000.444580078125, + "KL/std": 476.1236267089844, + "epoch": 0.6872246696035242, + "fcm_dpo/beta": 0.001233407761901617, + "fcm_dpo/delta": -0.13217654824256897, + "fcm_dpo/margin": 425.77752685546875, + "fcm_dpo/q_t": 0.38278520107269287, + "grad_norm": 49.870811462402344, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.8734508752822876, + "logits/rejected": -0.8809393644332886, + "logps/chosen": -644.6024169921875, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02880859375, + "logps/rejected": -1101.473388671875, + "loss": 1.0183, + "margin_dpo/margin_mean": 425.77752685546875, + "margin_dpo/margin_std": 522.344970703125, + "step": 468 + }, + { + "KL/chosen_KL_mean": -616.0804443359375, + "KL/mean": -829.309814453125, + "KL/rejected_KL_mean": -1042.5391845703125, + "KL/std": 435.38775634765625, + "epoch": 0.6886930983847284, + "fcm_dpo/beta": 0.0011981693096458912, + "fcm_dpo/delta": -0.11811123043298721, + "fcm_dpo/margin": 426.45867919921875, + "fcm_dpo/q_t": 0.38290101289749146, + "grad_norm": 41.835994720458984, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.871213436126709, + "logits/rejected": -0.8826764822006226, + "logps/chosen": -684.2052001953125, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -1147.32568359375, + "loss": 1.0167, + "margin_dpo/margin_mean": 426.4587097167969, + "margin_dpo/margin_std": 479.927978515625, + "step": 469 + }, + { + "KL/chosen_KL_mean": -526.5010986328125, + "KL/mean": -717.6756591796875, + "KL/rejected_KL_mean": -908.85009765625, + "KL/std": 461.4809265136719, + "epoch": 0.6901615271659325, + "fcm_dpo/beta": 0.0011902997503057122, + "fcm_dpo/delta": -0.05773991718888283, + "fcm_dpo/margin": 382.3490905761719, + "fcm_dpo/q_t": 0.39579594135284424, + "grad_norm": 28.576284408569336, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.8578736782073975, + "logits/rejected": -0.8692770004272461, + "logps/chosen": -570.29296875, + "logps/ref_chosen": -43.791927337646484, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -991.552978515625, + "loss": 1.07, + "margin_dpo/margin_mean": 382.34912109375, + "margin_dpo/margin_std": 517.0437622070312, + "step": 470 + }, + { + "KL/chosen_KL_mean": -668.16845703125, + "KL/mean": -836.795654296875, + "KL/rejected_KL_mean": -1005.4229125976562, + "KL/std": 489.2864990234375, + "epoch": 0.6916299559471366, + "fcm_dpo/beta": 0.0011771449353545904, + "fcm_dpo/delta": 0.0024843141436576843, + "fcm_dpo/margin": 337.25445556640625, + "fcm_dpo/q_t": 0.41261669993400574, + "grad_norm": 33.51979446411133, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.9775102734565735, + "logits/rejected": -0.9886398315429688, + "logps/chosen": -731.5079345703125, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -1089.033447265625, + "loss": 1.1157, + "margin_dpo/margin_mean": 337.25445556640625, + "margin_dpo/margin_std": 525.2001342773438, + "step": 471 + }, + { + "KL/chosen_KL_mean": -668.5523681640625, + "KL/mean": -864.1845703125, + "KL/rejected_KL_mean": -1059.816650390625, + "KL/std": 573.5145263671875, + "epoch": 0.6930983847283406, + "fcm_dpo/beta": 0.001169139752164483, + "fcm_dpo/delta": -0.060674797743558884, + "fcm_dpo/margin": 391.2642822265625, + "fcm_dpo/q_t": 0.4020659327507019, + "grad_norm": 32.862762451171875, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.9176386594772339, + "logits/rejected": -0.8923181295394897, + "logps/chosen": -752.218505859375, + "logps/ref_chosen": -83.66610717773438, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -1177.02587890625, + "loss": 1.098, + "margin_dpo/margin_mean": 391.2642822265625, + "margin_dpo/margin_std": 617.0120849609375, + "step": 472 + }, + { + "KL/chosen_KL_mean": -801.0420532226562, + "KL/mean": -896.4287109375, + "KL/rejected_KL_mean": -991.8154296875, + "KL/std": 595.682373046875, + "epoch": 0.6945668135095447, + "fcm_dpo/beta": 0.0011696891160681844, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 190.77340698242188, + "fcm_dpo/q_t": 0.45167526602745056, + "grad_norm": 116.70816802978516, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -0.896651029586792, + "logits/rejected": -0.8670951128005981, + "logps/chosen": -864.5390625, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -1072.9620361328125, + "loss": 1.3537, + "margin_dpo/margin_mean": 190.77340698242188, + "margin_dpo/margin_std": 781.681640625, + "step": 473 + }, + { + "KL/chosen_KL_mean": -648.32958984375, + "KL/mean": -815.1319580078125, + "KL/rejected_KL_mean": -981.9342041015625, + "KL/std": 509.63934326171875, + "epoch": 0.6960352422907489, + "fcm_dpo/beta": 0.001158315921202302, + "fcm_dpo/delta": -0.09819056838750839, + "fcm_dpo/margin": 333.6046447753906, + "fcm_dpo/q_t": 0.4119930565357208, + "grad_norm": 38.174800872802734, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.8967859745025635, + "logits/rejected": -0.9001563191413879, + "logps/chosen": -700.9415283203125, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -1072.0146484375, + "loss": 1.149, + "margin_dpo/margin_mean": 333.6046142578125, + "margin_dpo/margin_std": 572.8232421875, + "step": 474 + }, + { + "KL/chosen_KL_mean": -477.9742126464844, + "KL/mean": -690.75537109375, + "KL/rejected_KL_mean": -903.536376953125, + "KL/std": 416.74029541015625, + "epoch": 0.697503671071953, + "fcm_dpo/beta": 0.0011308316607028246, + "fcm_dpo/delta": -0.08551047742366791, + "fcm_dpo/margin": 425.5621337890625, + "fcm_dpo/q_t": 0.3888343572616577, + "grad_norm": 39.29723358154297, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.9726539850234985, + "logits/rejected": -0.9993470907211304, + "logps/chosen": -520.4694213867188, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06294250488281, + "logps/rejected": -993.599365234375, + "loss": 1.0222, + "margin_dpo/margin_mean": 425.56219482421875, + "margin_dpo/margin_std": 453.77056884765625, + "step": 475 + }, + { + "KL/chosen_KL_mean": -573.8340454101562, + "KL/mean": -741.2000732421875, + "KL/rejected_KL_mean": -908.5662841796875, + "KL/std": 460.19805908203125, + "epoch": 0.6989720998531571, + "fcm_dpo/beta": 0.0011303846258670092, + "fcm_dpo/delta": 0.02247927524149418, + "fcm_dpo/margin": 334.7321472167969, + "fcm_dpo/q_t": 0.41371750831604004, + "grad_norm": 51.48725509643555, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.9355987310409546, + "logits/rejected": -0.9493337869644165, + "logps/chosen": -616.783447265625, + "logps/ref_chosen": -42.94938278198242, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -982.2764892578125, + "loss": 1.1039, + "margin_dpo/margin_mean": 334.732177734375, + "margin_dpo/margin_std": 454.2925720214844, + "step": 476 + }, + { + "KL/chosen_KL_mean": -620.339599609375, + "KL/mean": -789.1563110351562, + "KL/rejected_KL_mean": -957.9730224609375, + "KL/std": 484.395751953125, + "epoch": 0.7004405286343612, + "fcm_dpo/beta": 0.001139188650995493, + "fcm_dpo/delta": 0.0156848281621933, + "fcm_dpo/margin": 337.6333923339844, + "fcm_dpo/q_t": 0.4125925898551941, + "grad_norm": 32.003143310546875, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.9794288873672485, + "logits/rejected": -0.9472505450248718, + "logps/chosen": -691.1122436523438, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -1034.1103515625, + "loss": 1.1367, + "margin_dpo/margin_mean": 337.6333923339844, + "margin_dpo/margin_std": 572.432373046875, + "step": 477 + }, + { + "KL/chosen_KL_mean": -509.4632568359375, + "KL/mean": -703.776123046875, + "KL/rejected_KL_mean": -898.0889282226562, + "KL/std": 428.4776611328125, + "epoch": 0.7019089574155654, + "fcm_dpo/beta": 0.0011328569380566478, + "fcm_dpo/delta": -0.04220225661993027, + "fcm_dpo/margin": 388.6256103515625, + "fcm_dpo/q_t": 0.3992430567741394, + "grad_norm": 36.747989654541016, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.8643758296966553, + "logits/rejected": -0.8856371641159058, + "logps/chosen": -550.90380859375, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -983.450927734375, + "loss": 1.071, + "margin_dpo/margin_mean": 388.6256103515625, + "margin_dpo/margin_std": 506.4171142578125, + "step": 478 + }, + { + "KL/chosen_KL_mean": -674.1643676757812, + "KL/mean": -861.4671020507812, + "KL/rejected_KL_mean": -1048.769775390625, + "KL/std": 529.594482421875, + "epoch": 0.7033773861967695, + "fcm_dpo/beta": 0.0011302338680252433, + "fcm_dpo/delta": -0.025240201503038406, + "fcm_dpo/margin": 374.60540771484375, + "fcm_dpo/q_t": 0.40734556317329407, + "grad_norm": 28.37042236328125, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.9004903435707092, + "logits/rejected": -0.9291303753852844, + "logps/chosen": -728.072265625, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -1143.8861083984375, + "loss": 1.1188, + "margin_dpo/margin_mean": 374.60540771484375, + "margin_dpo/margin_std": 616.9238891601562, + "step": 479 + }, + { + "KL/chosen_KL_mean": -794.90771484375, + "KL/mean": -941.1514892578125, + "KL/rejected_KL_mean": -1087.395263671875, + "KL/std": 502.28509521484375, + "epoch": 0.7048458149779736, + "fcm_dpo/beta": 0.0011154343374073505, + "fcm_dpo/delta": -0.04910217225551605, + "fcm_dpo/margin": 292.4875183105469, + "fcm_dpo/q_t": 0.42702075839042664, + "grad_norm": 56.938568115234375, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.9496725797653198, + "logits/rejected": -0.9423930644989014, + "logps/chosen": -853.5903930664062, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -1170.32763671875, + "loss": 1.1837, + "margin_dpo/margin_mean": 292.48748779296875, + "margin_dpo/margin_std": 548.897216796875, + "step": 480 + }, + { + "KL/chosen_KL_mean": -666.3424072265625, + "KL/mean": -894.8223876953125, + "KL/rejected_KL_mean": -1123.3023681640625, + "KL/std": 532.44287109375, + "epoch": 0.7063142437591777, + "fcm_dpo/beta": 0.0010935836471617222, + "fcm_dpo/delta": -0.10490460693836212, + "fcm_dpo/margin": 456.9600524902344, + "fcm_dpo/q_t": 0.3881867527961731, + "grad_norm": 31.939149856567383, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.9752233028411865, + "logits/rejected": -1.0002660751342773, + "logps/chosen": -721.306640625, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -1215.722900390625, + "loss": 1.0369, + "margin_dpo/margin_mean": 456.96002197265625, + "margin_dpo/margin_std": 578.0529174804688, + "step": 481 + }, + { + "KL/chosen_KL_mean": -705.298828125, + "KL/mean": -807.7088623046875, + "KL/rejected_KL_mean": -910.1187744140625, + "KL/std": 523.2813720703125, + "epoch": 0.7077826725403817, + "fcm_dpo/beta": 0.0010957256890833378, + "fcm_dpo/delta": 0.08043741434812546, + "fcm_dpo/margin": 204.8200225830078, + "fcm_dpo/q_t": 0.4466710090637207, + "grad_norm": 58.26255416870117, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.8530906438827515, + "logits/rejected": -0.8416086435317993, + "logps/chosen": -772.852294921875, + "logps/ref_chosen": -67.553466796875, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -997.7083740234375, + "loss": 1.2776, + "margin_dpo/margin_mean": 204.82000732421875, + "margin_dpo/margin_std": 610.1978759765625, + "step": 482 + }, + { + "KL/chosen_KL_mean": -627.250244140625, + "KL/mean": -863.0478515625, + "KL/rejected_KL_mean": -1098.845458984375, + "KL/std": 514.4844970703125, + "epoch": 0.7092511013215859, + "fcm_dpo/beta": 0.0010834920685738325, + "fcm_dpo/delta": -0.11722610890865326, + "fcm_dpo/margin": 471.59527587890625, + "fcm_dpo/q_t": 0.3861696720123291, + "grad_norm": 35.892913818359375, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.9211816787719727, + "logits/rejected": -0.9332787394523621, + "logps/chosen": -690.580078125, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -1194.632568359375, + "loss": 1.026, + "margin_dpo/margin_mean": 471.59527587890625, + "margin_dpo/margin_std": 582.1723022460938, + "step": 483 + }, + { + "KL/chosen_KL_mean": -537.6214599609375, + "KL/mean": -750.1485595703125, + "KL/rejected_KL_mean": -962.6756591796875, + "KL/std": 499.0599670410156, + "epoch": 0.71071953010279, + "fcm_dpo/beta": 0.0010744791943579912, + "fcm_dpo/delta": -0.059597231447696686, + "fcm_dpo/margin": 425.05419921875, + "fcm_dpo/q_t": 0.39632922410964966, + "grad_norm": 45.20163345336914, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.9501423835754395, + "logits/rejected": -0.974023699760437, + "logps/chosen": -596.7596435546875, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -1047.047119140625, + "loss": 1.0526, + "margin_dpo/margin_mean": 425.0542297363281, + "margin_dpo/margin_std": 516.1748657226562, + "step": 484 + }, + { + "KL/chosen_KL_mean": -551.4486083984375, + "KL/mean": -755.1604614257812, + "KL/rejected_KL_mean": -958.8723754882812, + "KL/std": 496.97821044921875, + "epoch": 0.7121879588839941, + "fcm_dpo/beta": 0.0010628815507516265, + "fcm_dpo/delta": -0.034554317593574524, + "fcm_dpo/margin": 407.4237060546875, + "fcm_dpo/q_t": 0.40300631523132324, + "grad_norm": 41.25438690185547, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.8435344696044922, + "logits/rejected": -0.8657543659210205, + "logps/chosen": -610.2982177734375, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408233642578, + "logps/rejected": -1062.2364501953125, + "loss": 1.0827, + "margin_dpo/margin_mean": 407.4237060546875, + "margin_dpo/margin_std": 568.4012451171875, + "step": 485 + }, + { + "KL/chosen_KL_mean": -635.773681640625, + "KL/mean": -867.3421630859375, + "KL/rejected_KL_mean": -1098.91064453125, + "KL/std": 573.5325927734375, + "epoch": 0.7136563876651982, + "fcm_dpo/beta": 0.0010442393831908703, + "fcm_dpo/delta": -0.08795761317014694, + "fcm_dpo/margin": 463.13702392578125, + "fcm_dpo/q_t": 0.3934386372566223, + "grad_norm": 33.520912170410156, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.9062224626541138, + "logits/rejected": -0.9161352515220642, + "logps/chosen": -691.0333251953125, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -1191.050048828125, + "loss": 1.0755, + "margin_dpo/margin_mean": 463.13702392578125, + "margin_dpo/margin_std": 690.687255859375, + "step": 486 + }, + { + "KL/chosen_KL_mean": -682.1150512695312, + "KL/mean": -852.4932861328125, + "KL/rejected_KL_mean": -1022.8714599609375, + "KL/std": 518.8946533203125, + "epoch": 0.7151248164464024, + "fcm_dpo/beta": 0.0010508847190067172, + "fcm_dpo/delta": 0.04303121566772461, + "fcm_dpo/margin": 340.75640869140625, + "fcm_dpo/q_t": 0.41665130853652954, + "grad_norm": 35.01145935058594, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -0.9379677772521973, + "logits/rejected": -0.9421348571777344, + "logps/chosen": -735.1783447265625, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.41883087158203, + "logps/rejected": -1115.290283203125, + "loss": 1.1348, + "margin_dpo/margin_mean": 340.75640869140625, + "margin_dpo/margin_std": 538.1895141601562, + "step": 487 + }, + { + "KL/chosen_KL_mean": -549.458740234375, + "KL/mean": -727.3135375976562, + "KL/rejected_KL_mean": -905.1682739257812, + "KL/std": 453.9654235839844, + "epoch": 0.7165932452276065, + "fcm_dpo/beta": 0.0010568746365606785, + "fcm_dpo/delta": 0.024668315425515175, + "fcm_dpo/margin": 355.7095947265625, + "fcm_dpo/q_t": 0.4128245711326599, + "grad_norm": 31.856176376342773, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.8694427609443665, + "logits/rejected": -0.8729550838470459, + "logps/chosen": -601.6868896484375, + "logps/ref_chosen": -52.22815704345703, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -989.1748046875, + "loss": 1.098, + "margin_dpo/margin_mean": 355.7095642089844, + "margin_dpo/margin_std": 445.57000732421875, + "step": 488 + }, + { + "KL/chosen_KL_mean": -507.87225341796875, + "KL/mean": -690.164794921875, + "KL/rejected_KL_mean": -872.4573974609375, + "KL/std": 465.132568359375, + "epoch": 0.7180616740088106, + "fcm_dpo/beta": 0.001060036476701498, + "fcm_dpo/delta": 0.013825876638293266, + "fcm_dpo/margin": 364.5850830078125, + "fcm_dpo/q_t": 0.41301560401916504, + "grad_norm": 29.248310089111328, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.9246722459793091, + "logits/rejected": -0.9232733249664307, + "logps/chosen": -563.8619384765625, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39812469482422, + "logps/rejected": -951.85546875, + "loss": 1.1035, + "margin_dpo/margin_mean": 364.5850830078125, + "margin_dpo/margin_std": 510.5589599609375, + "step": 489 + }, + { + "KL/chosen_KL_mean": -607.0692138671875, + "KL/mean": -791.8487548828125, + "KL/rejected_KL_mean": -976.6283569335938, + "KL/std": 546.3399047851562, + "epoch": 0.7195301027900147, + "fcm_dpo/beta": 0.0010612778132781386, + "fcm_dpo/delta": 0.007974715903401375, + "fcm_dpo/margin": 369.5592041015625, + "fcm_dpo/q_t": 0.4127916693687439, + "grad_norm": 38.29257583618164, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.9299312829971313, + "logits/rejected": -0.9667763710021973, + "logps/chosen": -659.435546875, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.4090576171875, + "logps/rejected": -1087.037353515625, + "loss": 1.1355, + "margin_dpo/margin_mean": 369.5591735839844, + "margin_dpo/margin_std": 631.1531982421875, + "step": 490 + }, + { + "KL/chosen_KL_mean": -584.8209228515625, + "KL/mean": -708.0709228515625, + "KL/rejected_KL_mean": -831.3209228515625, + "KL/std": 479.92486572265625, + "epoch": 0.7209985315712188, + "fcm_dpo/beta": 0.0010824804194271564, + "fcm_dpo/delta": 0.13659176230430603, + "fcm_dpo/margin": 246.49998474121094, + "fcm_dpo/q_t": 0.43931227922439575, + "grad_norm": 29.697641372680664, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.9027219414710999, + "logits/rejected": -0.8923330307006836, + "logps/chosen": -644.9371948242188, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -904.59375, + "loss": 1.1979, + "margin_dpo/margin_mean": 246.5, + "margin_dpo/margin_std": 469.1783752441406, + "step": 491 + }, + { + "KL/chosen_KL_mean": -603.3214721679688, + "KL/mean": -728.932861328125, + "KL/rejected_KL_mean": -854.5443115234375, + "KL/std": 482.99114990234375, + "epoch": 0.7224669603524229, + "fcm_dpo/beta": 0.001107184449210763, + "fcm_dpo/delta": 0.12549251317977905, + "fcm_dpo/margin": 251.2227325439453, + "fcm_dpo/q_t": 0.4381140470504761, + "grad_norm": 31.3509521484375, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -0.9563778638839722, + "logits/rejected": -0.9354947805404663, + "logps/chosen": -657.315673828125, + "logps/ref_chosen": -53.994178771972656, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -927.203857421875, + "loss": 1.2164, + "margin_dpo/margin_mean": 251.22274780273438, + "margin_dpo/margin_std": 553.6586303710938, + "step": 492 + }, + { + "KL/chosen_KL_mean": -617.3892211914062, + "KL/mean": -754.9351806640625, + "KL/rejected_KL_mean": -892.4810791015625, + "KL/std": 521.77880859375, + "epoch": 0.723935389133627, + "fcm_dpo/beta": 0.001113426173105836, + "fcm_dpo/delta": -0.012180797755718231, + "fcm_dpo/margin": 275.09185791015625, + "fcm_dpo/q_t": 0.42827779054641724, + "grad_norm": 33.60331344604492, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -0.975821852684021, + "logits/rejected": -0.9446998834609985, + "logps/chosen": -692.886474609375, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -979.8040771484375, + "loss": 1.1783, + "margin_dpo/margin_mean": 275.09185791015625, + "margin_dpo/margin_std": 499.40582275390625, + "step": 493 + }, + { + "KL/chosen_KL_mean": -487.6140441894531, + "KL/mean": -709.284912109375, + "KL/rejected_KL_mean": -930.9556884765625, + "KL/std": 471.108154296875, + "epoch": 0.7254038179148311, + "fcm_dpo/beta": 0.0010977558558806777, + "fcm_dpo/delta": -0.09117947518825531, + "fcm_dpo/margin": 443.3415832519531, + "fcm_dpo/q_t": 0.38731634616851807, + "grad_norm": 42.78213882446289, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.8541857600212097, + "logits/rejected": -0.8858389854431152, + "logps/chosen": -528.9733276367188, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -1017.0469970703125, + "loss": 1.0186, + "margin_dpo/margin_mean": 443.34161376953125, + "margin_dpo/margin_std": 465.9763488769531, + "step": 494 + }, + { + "KL/chosen_KL_mean": -549.9886474609375, + "KL/mean": -741.8480834960938, + "KL/rejected_KL_mean": -933.7076416015625, + "KL/std": 484.11676025390625, + "epoch": 0.7268722466960352, + "fcm_dpo/beta": 0.0010909372940659523, + "fcm_dpo/delta": -0.019423317164182663, + "fcm_dpo/margin": 383.71905517578125, + "fcm_dpo/q_t": 0.4072011411190033, + "grad_norm": 31.058788299560547, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -0.9760909080505371, + "logits/rejected": -0.9824463725090027, + "logps/chosen": -613.523681640625, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -1025.132080078125, + "loss": 1.0979, + "margin_dpo/margin_mean": 383.71905517578125, + "margin_dpo/margin_std": 579.0, + "step": 495 + }, + { + "KL/chosen_KL_mean": -678.1318359375, + "KL/mean": -781.4765625, + "KL/rejected_KL_mean": -884.8211669921875, + "KL/std": 376.869384765625, + "epoch": 0.7283406754772394, + "fcm_dpo/beta": 0.0011184395989403129, + "fcm_dpo/delta": 0.17267850041389465, + "fcm_dpo/margin": 206.68927001953125, + "fcm_dpo/q_t": 0.4458683431148529, + "grad_norm": 56.444026947021484, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.9465994238853455, + "logits/rejected": -0.9162840843200684, + "logps/chosen": -750.7238159179688, + "logps/ref_chosen": -72.5919189453125, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -969.1505126953125, + "loss": 1.2216, + "margin_dpo/margin_mean": 206.6892852783203, + "margin_dpo/margin_std": 417.4690856933594, + "step": 496 + }, + { + "KL/chosen_KL_mean": -637.2471313476562, + "KL/mean": -742.4765625, + "KL/rejected_KL_mean": -847.7059326171875, + "KL/std": 471.04632568359375, + "epoch": 0.7298091042584435, + "fcm_dpo/beta": 0.0011322898790240288, + "fcm_dpo/delta": 0.021924598142504692, + "fcm_dpo/margin": 210.45884704589844, + "fcm_dpo/q_t": 0.4449055790901184, + "grad_norm": 34.23469924926758, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.89613938331604, + "logits/rejected": -0.8781349658966064, + "logps/chosen": -695.841064453125, + "logps/ref_chosen": -58.59397506713867, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -923.9942626953125, + "loss": 1.2261, + "margin_dpo/margin_mean": 210.45883178710938, + "margin_dpo/margin_std": 444.05548095703125, + "step": 497 + }, + { + "KL/chosen_KL_mean": -613.268798828125, + "KL/mean": -784.17431640625, + "KL/rejected_KL_mean": -955.0799560546875, + "KL/std": 500.90350341796875, + "epoch": 0.7312775330396476, + "fcm_dpo/beta": 0.001133624231442809, + "fcm_dpo/delta": 0.01301711704581976, + "fcm_dpo/margin": 341.8111267089844, + "fcm_dpo/q_t": 0.4114514887332916, + "grad_norm": 39.67582321166992, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.9115738868713379, + "logits/rejected": -0.8932760953903198, + "logps/chosen": -684.4744873046875, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -1039.0379638671875, + "loss": 1.1264, + "margin_dpo/margin_mean": 341.8111267089844, + "margin_dpo/margin_std": 551.8848266601562, + "step": 498 + }, + { + "KL/chosen_KL_mean": -532.6873168945312, + "KL/mean": -742.4135131835938, + "KL/rejected_KL_mean": -952.1397705078125, + "KL/std": 498.259033203125, + "epoch": 0.7327459618208517, + "fcm_dpo/beta": 0.0011214257683604956, + "fcm_dpo/delta": -0.07398218661546707, + "fcm_dpo/margin": 419.45245361328125, + "fcm_dpo/q_t": 0.39359456300735474, + "grad_norm": 47.4464111328125, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -0.9583698511123657, + "logits/rejected": -0.991510272026062, + "logps/chosen": -583.9425048828125, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -1053.218505859375, + "loss": 1.0567, + "margin_dpo/margin_mean": 419.45245361328125, + "margin_dpo/margin_std": 553.3563232421875, + "step": 499 + }, + { + "KL/chosen_KL_mean": -643.7020874023438, + "KL/mean": -815.4398193359375, + "KL/rejected_KL_mean": -987.177490234375, + "KL/std": 434.6232604980469, + "epoch": 0.7342143906020558, + "fcm_dpo/beta": 0.00111986487172544, + "fcm_dpo/delta": 0.015957213938236237, + "fcm_dpo/margin": 343.4754333496094, + "fcm_dpo/q_t": 0.41234683990478516, + "grad_norm": 42.036109924316406, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.9217053055763245, + "logits/rejected": -0.9468744993209839, + "logps/chosen": -700.7294921875, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -1081.1116943359375, + "loss": 1.1254, + "margin_dpo/margin_mean": 343.4754333496094, + "margin_dpo/margin_std": 541.9052734375, + "step": 500 + }, + { + "KL/chosen_KL_mean": -556.8917846679688, + "KL/mean": -738.462158203125, + "KL/rejected_KL_mean": -920.032470703125, + "KL/std": 467.4353942871094, + "epoch": 0.73568281938326, + "fcm_dpo/beta": 0.0011241002939641476, + "fcm_dpo/delta": -0.008717566728591919, + "fcm_dpo/margin": 363.14068603515625, + "fcm_dpo/q_t": 0.4085647165775299, + "grad_norm": 31.888412475585938, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.8804645538330078, + "logits/rejected": -0.8640455007553101, + "logps/chosen": -611.2513427734375, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670013427734, + "logps/rejected": -1000.189208984375, + "loss": 1.113, + "margin_dpo/margin_mean": 363.14068603515625, + "margin_dpo/margin_std": 566.4456787109375, + "step": 501 + }, + { + "KL/chosen_KL_mean": -507.1001892089844, + "KL/mean": -698.0469360351562, + "KL/rejected_KL_mean": -888.99365234375, + "KL/std": 443.9556579589844, + "epoch": 0.737151248164464, + "fcm_dpo/beta": 0.001116321887820959, + "fcm_dpo/delta": -0.027478674426674843, + "fcm_dpo/margin": 381.8934326171875, + "fcm_dpo/q_t": 0.40353554487228394, + "grad_norm": 25.909517288208008, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.8332573175430298, + "logits/rejected": -0.8167060017585754, + "logps/chosen": -574.70068359375, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -971.9423828125, + "loss": 1.067, + "margin_dpo/margin_mean": 381.8934326171875, + "margin_dpo/margin_std": 470.9022216796875, + "step": 502 + }, + { + "KL/chosen_KL_mean": -491.82965087890625, + "KL/mean": -663.3250732421875, + "KL/rejected_KL_mean": -834.8204345703125, + "KL/std": 403.2861022949219, + "epoch": 0.7386196769456681, + "fcm_dpo/beta": 0.0011143197771161795, + "fcm_dpo/delta": 0.018423786386847496, + "fcm_dpo/margin": 342.99078369140625, + "fcm_dpo/q_t": 0.41295260190963745, + "grad_norm": 23.40604591369629, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.868687629699707, + "logits/rejected": -0.8567318320274353, + "logps/chosen": -546.9080810546875, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -917.3258666992188, + "loss": 1.0968, + "margin_dpo/margin_mean": 342.99078369140625, + "margin_dpo/margin_std": 444.3095397949219, + "step": 503 + }, + { + "KL/chosen_KL_mean": -552.1359252929688, + "KL/mean": -755.714599609375, + "KL/rejected_KL_mean": -959.2933349609375, + "KL/std": 474.62152099609375, + "epoch": 0.7400881057268722, + "fcm_dpo/beta": 0.001110826968215406, + "fcm_dpo/delta": -0.0547223836183548, + "fcm_dpo/margin": 407.1573486328125, + "fcm_dpo/q_t": 0.39741408824920654, + "grad_norm": 27.982126235961914, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.8701947927474976, + "logits/rejected": -0.8900790214538574, + "logps/chosen": -612.1016845703125, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76212310791016, + "logps/rejected": -1063.055419921875, + "loss": 1.0645, + "margin_dpo/margin_mean": 407.1573486328125, + "margin_dpo/margin_std": 519.5411987304688, + "step": 504 + }, + { + "KL/chosen_KL_mean": -625.5159301757812, + "KL/mean": -729.96484375, + "KL/rejected_KL_mean": -834.413818359375, + "KL/std": 455.81085205078125, + "epoch": 0.7415565345080763, + "fcm_dpo/beta": 0.0011349001433700323, + "fcm_dpo/delta": 0.1667182594537735, + "fcm_dpo/margin": 208.89784240722656, + "fcm_dpo/q_t": 0.4452478885650635, + "grad_norm": 34.76630783081055, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.8760533332824707, + "logits/rejected": -0.8471982479095459, + "logps/chosen": -701.6708374023438, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -922.9991455078125, + "loss": 1.2236, + "margin_dpo/margin_mean": 208.8978271484375, + "margin_dpo/margin_std": 435.1164245605469, + "step": 505 + }, + { + "KL/chosen_KL_mean": -502.8348083496094, + "KL/mean": -683.107666015625, + "KL/rejected_KL_mean": -863.3804931640625, + "KL/std": 453.17449951171875, + "epoch": 0.7430249632892805, + "fcm_dpo/beta": 0.0011371751315891743, + "fcm_dpo/delta": -0.01079019159078598, + "fcm_dpo/margin": 360.545654296875, + "fcm_dpo/q_t": 0.40623512864112854, + "grad_norm": 38.504554748535156, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.9111833572387695, + "logits/rejected": -0.9226495027542114, + "logps/chosen": -551.7952880859375, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -941.7955322265625, + "loss": 1.0915, + "margin_dpo/margin_mean": 360.545654296875, + "margin_dpo/margin_std": 491.61199951171875, + "step": 506 + }, + { + "KL/chosen_KL_mean": -591.5228271484375, + "KL/mean": -751.4690551757812, + "KL/rejected_KL_mean": -911.4154052734375, + "KL/std": 523.9932250976562, + "epoch": 0.7444933920704846, + "fcm_dpo/beta": 0.0011462382972240448, + "fcm_dpo/delta": 0.03460888937115669, + "fcm_dpo/margin": 319.8925476074219, + "fcm_dpo/q_t": 0.4203076958656311, + "grad_norm": 36.931053161621094, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.9362499713897705, + "logits/rejected": -0.9389501214027405, + "logps/chosen": -658.3242797851562, + "logps/ref_chosen": -66.80149841308594, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -1006.7882690429688, + "loss": 1.1484, + "margin_dpo/margin_mean": 319.8925476074219, + "margin_dpo/margin_std": 573.2492065429688, + "step": 507 + }, + { + "KL/chosen_KL_mean": -627.1077880859375, + "KL/mean": -764.3685302734375, + "KL/rejected_KL_mean": -901.6292724609375, + "KL/std": 510.8812255859375, + "epoch": 0.7459618208516887, + "fcm_dpo/beta": 0.001165491994470358, + "fcm_dpo/delta": 0.08244814723730087, + "fcm_dpo/margin": 274.52154541015625, + "fcm_dpo/q_t": 0.4301344156265259, + "grad_norm": 38.228172302246094, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.8927318453788757, + "logits/rejected": -0.890540599822998, + "logps/chosen": -698.4112548828125, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -997.2568359375, + "loss": 1.2033, + "margin_dpo/margin_mean": 274.5215759277344, + "margin_dpo/margin_std": 596.1304931640625, + "step": 508 + }, + { + "KL/chosen_KL_mean": -482.7675476074219, + "KL/mean": -632.9190673828125, + "KL/rejected_KL_mean": -783.0706176757812, + "KL/std": 380.39312744140625, + "epoch": 0.7474302496328928, + "fcm_dpo/beta": 0.0011804470559582114, + "fcm_dpo/delta": 0.04675152152776718, + "fcm_dpo/margin": 300.30303955078125, + "fcm_dpo/q_t": 0.41989073157310486, + "grad_norm": 29.72893524169922, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.8038022518157959, + "logits/rejected": -0.79693204164505, + "logps/chosen": -546.5865478515625, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -866.3270874023438, + "loss": 1.1333, + "margin_dpo/margin_mean": 300.30303955078125, + "margin_dpo/margin_std": 463.26605224609375, + "step": 509 + }, + { + "KL/chosen_KL_mean": -588.75, + "KL/mean": -778.4863891601562, + "KL/rejected_KL_mean": -968.22265625, + "KL/std": 429.193115234375, + "epoch": 0.748898678414097, + "fcm_dpo/beta": 0.0011725020594894886, + "fcm_dpo/delta": -0.046999622136354446, + "fcm_dpo/margin": 379.4727478027344, + "fcm_dpo/q_t": 0.3961718678474426, + "grad_norm": 29.25191307067871, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.8102399110794067, + "logits/rejected": -0.8293131589889526, + "logps/chosen": -640.6284790039062, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -1070.98779296875, + "loss": 1.0473, + "margin_dpo/margin_mean": 379.47271728515625, + "margin_dpo/margin_std": 420.7159423828125, + "step": 510 + }, + { + "KL/chosen_KL_mean": -559.4649658203125, + "KL/mean": -713.5518798828125, + "KL/rejected_KL_mean": -867.6387939453125, + "KL/std": 470.72216796875, + "epoch": 0.750367107195301, + "fcm_dpo/beta": 0.001175806624814868, + "fcm_dpo/delta": 0.039014674723148346, + "fcm_dpo/margin": 308.1739196777344, + "fcm_dpo/q_t": 0.4179048538208008, + "grad_norm": 39.78738021850586, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.8937386274337769, + "logits/rejected": -0.8976330161094666, + "logps/chosen": -619.7030029296875, + "logps/ref_chosen": -60.23811721801758, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -960.49560546875, + "loss": 1.1575, + "margin_dpo/margin_mean": 308.17388916015625, + "margin_dpo/margin_std": 568.642333984375, + "step": 511 + }, + { + "KL/chosen_KL_mean": -453.3255615234375, + "KL/mean": -663.5873413085938, + "KL/rejected_KL_mean": -873.84912109375, + "KL/std": 430.00079345703125, + "epoch": 0.7518355359765051, + "fcm_dpo/beta": 0.0011590380454435945, + "fcm_dpo/delta": -0.09211389720439911, + "fcm_dpo/margin": 420.5235595703125, + "fcm_dpo/q_t": 0.38680607080459595, + "grad_norm": 51.16664505004883, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.8603556752204895, + "logits/rejected": -0.8814679384231567, + "logps/chosen": -508.2310791015625, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -955.7249755859375, + "loss": 1.0153, + "margin_dpo/margin_mean": 420.5235290527344, + "margin_dpo/margin_std": 434.5292053222656, + "step": 512 + }, + { + "KL/chosen_KL_mean": -557.367431640625, + "KL/mean": -709.244873046875, + "KL/rejected_KL_mean": -861.1224365234375, + "KL/std": 405.1826171875, + "epoch": 0.7533039647577092, + "fcm_dpo/beta": 0.0011576918186619878, + "fcm_dpo/delta": 0.04973098263144493, + "fcm_dpo/margin": 303.75506591796875, + "fcm_dpo/q_t": 0.42015981674194336, + "grad_norm": 40.44023513793945, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.914627194404602, + "logits/rejected": -0.8992458581924438, + "logps/chosen": -622.2838134765625, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -937.1848754882812, + "loss": 1.1394, + "margin_dpo/margin_mean": 303.75506591796875, + "margin_dpo/margin_std": 474.69561767578125, + "step": 513 + }, + { + "KL/chosen_KL_mean": -580.2469482421875, + "KL/mean": -727.4422607421875, + "KL/rejected_KL_mean": -874.6375732421875, + "KL/std": 435.10223388671875, + "epoch": 0.7547723935389133, + "fcm_dpo/beta": 0.0011787754483520985, + "fcm_dpo/delta": 0.05473232641816139, + "fcm_dpo/margin": 294.3906555175781, + "fcm_dpo/q_t": 0.42276865243911743, + "grad_norm": 35.93750762939453, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.8526400327682495, + "logits/rejected": -0.8441455364227295, + "logps/chosen": -654.4765625, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -953.5831298828125, + "loss": 1.1468, + "margin_dpo/margin_mean": 294.39068603515625, + "margin_dpo/margin_std": 494.26495361328125, + "step": 514 + }, + { + "KL/chosen_KL_mean": -495.4428405761719, + "KL/mean": -614.765625, + "KL/rejected_KL_mean": -734.0885009765625, + "KL/std": 379.7894287109375, + "epoch": 0.7562408223201175, + "fcm_dpo/beta": 0.0012007859768345952, + "fcm_dpo/delta": 0.11678852140903473, + "fcm_dpo/margin": 238.64556884765625, + "fcm_dpo/q_t": 0.43298569321632385, + "grad_norm": 42.832855224609375, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.8383795022964478, + "logits/rejected": -0.8531197905540466, + "logps/chosen": -545.8444213867188, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -821.1862182617188, + "loss": 1.1849, + "margin_dpo/margin_mean": 238.6455841064453, + "margin_dpo/margin_std": 437.5038146972656, + "step": 515 + }, + { + "KL/chosen_KL_mean": -530.1577758789062, + "KL/mean": -699.3049926757812, + "KL/rejected_KL_mean": -868.4521484375, + "KL/std": 425.07623291015625, + "epoch": 0.7577092511013216, + "fcm_dpo/beta": 0.0012106327340006828, + "fcm_dpo/delta": -0.01020483672618866, + "fcm_dpo/margin": 338.2943420410156, + "fcm_dpo/q_t": 0.4044458270072937, + "grad_norm": 54.36648941040039, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.8974713087081909, + "logits/rejected": -0.9246504902839661, + "logps/chosen": -594.807373046875, + "logps/ref_chosen": -64.64956665039062, + "logps/ref_rejected": -111.72237396240234, + "logps/rejected": -980.1744995117188, + "loss": 1.0818, + "margin_dpo/margin_mean": 338.2943420410156, + "margin_dpo/margin_std": 424.7276611328125, + "step": 516 + }, + { + "KL/chosen_KL_mean": -559.2984008789062, + "KL/mean": -758.3714599609375, + "KL/rejected_KL_mean": -957.4444580078125, + "KL/std": 472.23651123046875, + "epoch": 0.7591776798825257, + "fcm_dpo/beta": 0.0011898339726030827, + "fcm_dpo/delta": -0.07756029814481735, + "fcm_dpo/margin": 398.1460266113281, + "fcm_dpo/q_t": 0.3944876194000244, + "grad_norm": 29.054262161254883, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.8497953414916992, + "logits/rejected": -0.8503054976463318, + "logps/chosen": -620.2119750976562, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -1046.527587890625, + "loss": 1.0518, + "margin_dpo/margin_mean": 398.14605712890625, + "margin_dpo/margin_std": 519.8525390625, + "step": 517 + }, + { + "KL/chosen_KL_mean": -528.1893310546875, + "KL/mean": -698.426025390625, + "KL/rejected_KL_mean": -868.6627197265625, + "KL/std": 393.81671142578125, + "epoch": 0.7606461086637298, + "fcm_dpo/beta": 0.0011824161047115922, + "fcm_dpo/delta": -0.002931937575340271, + "fcm_dpo/margin": 340.473388671875, + "fcm_dpo/q_t": 0.40656790137290955, + "grad_norm": 54.556785583496094, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.8558133840560913, + "logits/rejected": -0.874567985534668, + "logps/chosen": -585.6452026367188, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -953.9754638671875, + "loss": 1.0875, + "margin_dpo/margin_mean": 340.473388671875, + "margin_dpo/margin_std": 436.8955078125, + "step": 518 + }, + { + "KL/chosen_KL_mean": -525.33154296875, + "KL/mean": -649.0626220703125, + "KL/rejected_KL_mean": -772.793701171875, + "KL/std": 368.4232177734375, + "epoch": 0.762114537444934, + "fcm_dpo/beta": 0.001208610599860549, + "fcm_dpo/delta": 0.10347578674554825, + "fcm_dpo/margin": 247.46214294433594, + "fcm_dpo/q_t": 0.4309791624546051, + "grad_norm": 33.39023971557617, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.837517261505127, + "logits/rejected": -0.8200976848602295, + "logps/chosen": -599.3948974609375, + "logps/ref_chosen": -74.06331634521484, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -877.2378540039062, + "loss": 1.183, + "margin_dpo/margin_mean": 247.46214294433594, + "margin_dpo/margin_std": 456.01971435546875, + "step": 519 + }, + { + "KL/chosen_KL_mean": -569.647705078125, + "KL/mean": -727.2234497070312, + "KL/rejected_KL_mean": -884.7991943359375, + "KL/std": 423.0360412597656, + "epoch": 0.7635829662261381, + "fcm_dpo/beta": 0.0012114193523302674, + "fcm_dpo/delta": 0.018862294033169746, + "fcm_dpo/margin": 315.1514892578125, + "fcm_dpo/q_t": 0.4147945046424866, + "grad_norm": 34.308570861816406, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.81200110912323, + "logits/rejected": -0.8175575733184814, + "logps/chosen": -639.947509765625, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -984.780517578125, + "loss": 1.1189, + "margin_dpo/margin_mean": 315.1515197753906, + "margin_dpo/margin_std": 470.97100830078125, + "step": 520 + }, + { + "KL/chosen_KL_mean": -510.13311767578125, + "KL/mean": -689.7613525390625, + "KL/rejected_KL_mean": -869.3895263671875, + "KL/std": 446.6356201171875, + "epoch": 0.7650513950073421, + "fcm_dpo/beta": 0.0012165037915110588, + "fcm_dpo/delta": -0.03935041278600693, + "fcm_dpo/margin": 359.2563781738281, + "fcm_dpo/q_t": 0.40068429708480835, + "grad_norm": 40.11237335205078, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.8692072629928589, + "logits/rejected": -0.8731534481048584, + "logps/chosen": -568.2760620117188, + "logps/ref_chosen": -58.14292526245117, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -952.670166015625, + "loss": 1.0694, + "margin_dpo/margin_mean": 359.2563781738281, + "margin_dpo/margin_std": 451.70306396484375, + "step": 521 + }, + { + "KL/chosen_KL_mean": -590.7149658203125, + "KL/mean": -751.5833740234375, + "KL/rejected_KL_mean": -912.4517822265625, + "KL/std": 471.05303955078125, + "epoch": 0.7665198237885462, + "fcm_dpo/beta": 0.001208572182804346, + "fcm_dpo/delta": 0.011602986603975296, + "fcm_dpo/margin": 321.7369079589844, + "fcm_dpo/q_t": 0.4123893082141876, + "grad_norm": 32.06018829345703, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.8791370987892151, + "logits/rejected": -0.8823133707046509, + "logps/chosen": -637.2626342773438, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -978.4656982421875, + "loss": 1.1248, + "margin_dpo/margin_mean": 321.7369079589844, + "margin_dpo/margin_std": 514.0554809570312, + "step": 522 + }, + { + "KL/chosen_KL_mean": -634.4599609375, + "KL/mean": -804.0902099609375, + "KL/rejected_KL_mean": -973.7205810546875, + "KL/std": 544.9263916015625, + "epoch": 0.7679882525697503, + "fcm_dpo/beta": 0.0012069594813510776, + "fcm_dpo/delta": -0.0098798843100667, + "fcm_dpo/margin": 339.26068115234375, + "fcm_dpo/q_t": 0.4096330404281616, + "grad_norm": 39.04078674316406, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.9305659532546997, + "logits/rejected": -0.9318529367446899, + "logps/chosen": -696.2294921875, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -1057.48193359375, + "loss": 1.1257, + "margin_dpo/margin_mean": 339.26068115234375, + "margin_dpo/margin_std": 569.8573608398438, + "step": 523 + }, + { + "KL/chosen_KL_mean": -631.42236328125, + "KL/mean": -814.2626953125, + "KL/rejected_KL_mean": -997.10302734375, + "KL/std": 514.68408203125, + "epoch": 0.7694566813509545, + "fcm_dpo/beta": 0.001192695926874876, + "fcm_dpo/delta": -0.03869359940290451, + "fcm_dpo/margin": 365.6807556152344, + "fcm_dpo/q_t": 0.3989192843437195, + "grad_norm": 36.887882232666016, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.9622774124145508, + "logits/rejected": -0.9509581327438354, + "logps/chosen": -709.494384765625, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -1078.405029296875, + "loss": 1.0926, + "margin_dpo/margin_mean": 365.68072509765625, + "margin_dpo/margin_std": 523.1288452148438, + "step": 524 + }, + { + "KL/chosen_KL_mean": -622.1337890625, + "KL/mean": -846.5458984375, + "KL/rejected_KL_mean": -1070.9580078125, + "KL/std": 526.7284545898438, + "epoch": 0.7709251101321586, + "fcm_dpo/beta": 0.0011688778176903725, + "fcm_dpo/delta": -0.13245530426502228, + "fcm_dpo/margin": 448.8243408203125, + "fcm_dpo/q_t": 0.3837316036224365, + "grad_norm": 25.891273498535156, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.918233335018158, + "logits/rejected": -0.9528594017028809, + "logps/chosen": -672.9616088867188, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05294036865234, + "logps/rejected": -1171.010986328125, + "loss": 1.0231, + "margin_dpo/margin_mean": 448.82427978515625, + "margin_dpo/margin_std": 557.4779663085938, + "step": 525 + }, + { + "KL/chosen_KL_mean": -663.134765625, + "KL/mean": -881.9912109375, + "KL/rejected_KL_mean": -1100.84765625, + "KL/std": 521.672119140625, + "epoch": 0.7723935389133627, + "fcm_dpo/beta": 0.0011508764000609517, + "fcm_dpo/delta": -0.10911859571933746, + "fcm_dpo/margin": 437.71282958984375, + "fcm_dpo/q_t": 0.38582324981689453, + "grad_norm": 23.709228515625, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.8999603986740112, + "logits/rejected": -0.8906654119491577, + "logps/chosen": -726.302001953125, + "logps/ref_chosen": -63.167236328125, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -1187.156982421875, + "loss": 1.0506, + "margin_dpo/margin_mean": 437.7127990722656, + "margin_dpo/margin_std": 586.166259765625, + "step": 526 + }, + { + "KL/chosen_KL_mean": -687.536865234375, + "KL/mean": -834.2434692382812, + "KL/rejected_KL_mean": -980.9501342773438, + "KL/std": 535.2979736328125, + "epoch": 0.7738619676945668, + "fcm_dpo/beta": 0.001149723306298256, + "fcm_dpo/delta": 0.06489390134811401, + "fcm_dpo/margin": 293.4132385253906, + "fcm_dpo/q_t": 0.42192360758781433, + "grad_norm": 43.69949722290039, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -1.1003575325012207, + "logits/rejected": -1.0651922225952148, + "logps/chosen": -758.66552734375, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -1059.292724609375, + "loss": 1.1423, + "margin_dpo/margin_mean": 293.4132385253906, + "margin_dpo/margin_std": 454.53448486328125, + "step": 527 + }, + { + "KL/chosen_KL_mean": -689.6435546875, + "KL/mean": -871.0379638671875, + "KL/rejected_KL_mean": -1052.4324951171875, + "KL/std": 510.6888427734375, + "epoch": 0.775330396475771, + "fcm_dpo/beta": 0.0011513070203363895, + "fcm_dpo/delta": -0.018490692600607872, + "fcm_dpo/margin": 362.7888488769531, + "fcm_dpo/q_t": 0.4090343713760376, + "grad_norm": 49.07489776611328, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -1.0010507106781006, + "logits/rejected": -0.9815536141395569, + "logps/chosen": -757.7330322265625, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -1146.342529296875, + "loss": 1.1277, + "margin_dpo/margin_mean": 362.7888488769531, + "margin_dpo/margin_std": 622.3973388671875, + "step": 528 + }, + { + "KL/chosen_KL_mean": -799.9935302734375, + "KL/mean": -926.8414306640625, + "KL/rejected_KL_mean": -1053.689208984375, + "KL/std": 460.82391357421875, + "epoch": 0.7767988252569751, + "fcm_dpo/beta": 0.0011541005223989487, + "fcm_dpo/delta": 0.01186126284301281, + "fcm_dpo/margin": 253.6956787109375, + "fcm_dpo/q_t": 0.43285101652145386, + "grad_norm": 45.102630615234375, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -1.0135385990142822, + "logits/rejected": -1.021782398223877, + "logps/chosen": -855.5684814453125, + "logps/ref_chosen": -55.57495880126953, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -1142.8983154296875, + "loss": 1.2213, + "margin_dpo/margin_mean": 253.6956787109375, + "margin_dpo/margin_std": 574.0363159179688, + "step": 529 + }, + { + "KL/chosen_KL_mean": -676.3377685546875, + "KL/mean": -873.301513671875, + "KL/rejected_KL_mean": -1070.2652587890625, + "KL/std": 554.6763916015625, + "epoch": 0.7782672540381792, + "fcm_dpo/beta": 0.0011464983690530062, + "fcm_dpo/delta": -0.0540442019701004, + "fcm_dpo/margin": 393.927490234375, + "fcm_dpo/q_t": 0.4019849896430969, + "grad_norm": 44.16566467285156, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.9260751008987427, + "logits/rejected": -0.919657289981842, + "logps/chosen": -723.939208984375, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -1157.5498046875, + "loss": 1.1222, + "margin_dpo/margin_mean": 393.927490234375, + "margin_dpo/margin_std": 693.3948974609375, + "step": 530 + }, + { + "KL/chosen_KL_mean": -755.704833984375, + "KL/mean": -917.4498901367188, + "KL/rejected_KL_mean": -1079.19482421875, + "KL/std": 589.1134033203125, + "epoch": 0.7797356828193832, + "fcm_dpo/beta": 0.0011490847682580352, + "fcm_dpo/delta": 0.0292234905064106, + "fcm_dpo/margin": 323.489990234375, + "fcm_dpo/q_t": 0.417187362909317, + "grad_norm": 41.70063781738281, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -1.028259038925171, + "logits/rejected": -1.0060193538665771, + "logps/chosen": -810.950927734375, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -1149.80078125, + "loss": 1.1423, + "margin_dpo/margin_mean": 323.4900207519531, + "margin_dpo/margin_std": 554.1434326171875, + "step": 531 + }, + { + "KL/chosen_KL_mean": -746.92626953125, + "KL/mean": -928.888671875, + "KL/rejected_KL_mean": -1110.85107421875, + "KL/std": 552.4840087890625, + "epoch": 0.7812041116005873, + "fcm_dpo/beta": 0.0011367748957127333, + "fcm_dpo/delta": -0.01567455381155014, + "fcm_dpo/margin": 363.9248046875, + "fcm_dpo/q_t": 0.40855199098587036, + "grad_norm": 68.5473861694336, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -0.9877306818962097, + "logits/rejected": -0.9620273113250732, + "logps/chosen": -817.2122802734375, + "logps/ref_chosen": -70.28601837158203, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -1197.4423828125, + "loss": 1.1483, + "margin_dpo/margin_mean": 363.9248046875, + "margin_dpo/margin_std": 657.9794311523438, + "step": 532 + }, + { + "KL/chosen_KL_mean": -678.8699951171875, + "KL/mean": -809.2179565429688, + "KL/rejected_KL_mean": -939.5658569335938, + "KL/std": 484.06103515625, + "epoch": 0.7826725403817915, + "fcm_dpo/beta": 0.0011436111526563764, + "fcm_dpo/delta": -0.010318025015294552, + "fcm_dpo/margin": 260.69586181640625, + "fcm_dpo/q_t": 0.43308863043785095, + "grad_norm": 41.75889205932617, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -1.0021346807479858, + "logits/rejected": -0.9763340950012207, + "logps/chosen": -743.72509765625, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -1016.1539306640625, + "loss": 1.2151, + "margin_dpo/margin_mean": 260.69586181640625, + "margin_dpo/margin_std": 586.0986328125, + "step": 533 + }, + { + "KL/chosen_KL_mean": -687.0634765625, + "KL/mean": -865.5532836914062, + "KL/rejected_KL_mean": -1044.04296875, + "KL/std": 514.9797973632812, + "epoch": 0.7841409691629956, + "fcm_dpo/beta": 0.0011327785905450583, + "fcm_dpo/delta": -0.005801960825920105, + "fcm_dpo/margin": 356.9794616699219, + "fcm_dpo/q_t": 0.40905874967575073, + "grad_norm": 35.797950744628906, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -0.9988099336624146, + "logits/rejected": -0.9967177510261536, + "logps/chosen": -747.182861328125, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -1122.58642578125, + "loss": 1.1125, + "margin_dpo/margin_mean": 356.9794921875, + "margin_dpo/margin_std": 537.181396484375, + "step": 534 + }, + { + "KL/chosen_KL_mean": -576.67333984375, + "KL/mean": -783.545654296875, + "KL/rejected_KL_mean": -990.41796875, + "KL/std": 499.79852294921875, + "epoch": 0.7856093979441997, + "fcm_dpo/beta": 0.001136034494265914, + "fcm_dpo/delta": -0.07386443018913269, + "fcm_dpo/margin": 413.7446594238281, + "fcm_dpo/q_t": 0.39456337690353394, + "grad_norm": 30.948278427124023, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.9196850061416626, + "logits/rejected": -0.921947717666626, + "logps/chosen": -631.0035400390625, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -1086.7255859375, + "loss": 1.0557, + "margin_dpo/margin_mean": 413.7446594238281, + "margin_dpo/margin_std": 526.1167602539062, + "step": 535 + }, + { + "KL/chosen_KL_mean": -520.1702880859375, + "KL/mean": -766.1087646484375, + "KL/rejected_KL_mean": -1012.0472412109375, + "KL/std": 554.3941650390625, + "epoch": 0.7870778267254038, + "fcm_dpo/beta": 0.0011004150146618485, + "fcm_dpo/delta": -0.14926910400390625, + "fcm_dpo/margin": 491.8769226074219, + "fcm_dpo/q_t": 0.383211225271225, + "grad_norm": 29.700851440429688, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.8961449265480042, + "logits/rejected": -0.9250037670135498, + "logps/chosen": -567.2508544921875, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -1101.14501953125, + "loss": 1.0215, + "margin_dpo/margin_mean": 491.8769226074219, + "margin_dpo/margin_std": 654.18408203125, + "step": 536 + }, + { + "KL/chosen_KL_mean": -537.0285034179688, + "KL/mean": -701.8114013671875, + "KL/rejected_KL_mean": -866.59423828125, + "KL/std": 445.88031005859375, + "epoch": 0.788546255506608, + "fcm_dpo/beta": 0.0011028747539967299, + "fcm_dpo/delta": 0.037213459610939026, + "fcm_dpo/margin": 329.5657958984375, + "fcm_dpo/q_t": 0.41649651527404785, + "grad_norm": 44.981773376464844, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.9382889270782471, + "logits/rejected": -0.9202646017074585, + "logps/chosen": -594.7760009765625, + "logps/ref_chosen": -57.747467041015625, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -937.0326538085938, + "loss": 1.1431, + "margin_dpo/margin_mean": 329.5657958984375, + "margin_dpo/margin_std": 545.5430297851562, + "step": 537 + }, + { + "KL/chosen_KL_mean": -655.9216918945312, + "KL/mean": -841.97509765625, + "KL/rejected_KL_mean": -1028.028564453125, + "KL/std": 484.8907165527344, + "epoch": 0.7900146842878121, + "fcm_dpo/beta": 0.00109610625077039, + "fcm_dpo/delta": -0.008285703137516975, + "fcm_dpo/margin": 372.10687255859375, + "fcm_dpo/q_t": 0.40666401386260986, + "grad_norm": 28.537601470947266, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.9427316188812256, + "logits/rejected": -0.9291995763778687, + "logps/chosen": -722.337646484375, + "logps/ref_chosen": -66.41594696044922, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -1112.256591796875, + "loss": 1.0874, + "margin_dpo/margin_mean": 372.1068420410156, + "margin_dpo/margin_std": 494.2760314941406, + "step": 538 + }, + { + "KL/chosen_KL_mean": -552.2450561523438, + "KL/mean": -729.5706176757812, + "KL/rejected_KL_mean": -906.8961791992188, + "KL/std": 395.36083984375, + "epoch": 0.7914831130690162, + "fcm_dpo/beta": 0.0011011988390237093, + "fcm_dpo/delta": 0.009760351851582527, + "fcm_dpo/margin": 354.651123046875, + "fcm_dpo/q_t": 0.40968143939971924, + "grad_norm": 34.4008903503418, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -0.9456039667129517, + "logits/rejected": -0.9376469254493713, + "logps/chosen": -610.7379150390625, + "logps/ref_chosen": -58.492855072021484, + "logps/ref_rejected": -91.85395050048828, + "logps/rejected": -998.7501220703125, + "loss": 1.0878, + "margin_dpo/margin_mean": 354.651123046875, + "margin_dpo/margin_std": 434.3199462890625, + "step": 539 + }, + { + "KL/chosen_KL_mean": -543.5631103515625, + "KL/mean": -769.376953125, + "KL/rejected_KL_mean": -995.1907958984375, + "KL/std": 495.0863342285156, + "epoch": 0.7929515418502202, + "fcm_dpo/beta": 0.0010876839514821768, + "fcm_dpo/delta": -0.09584314376115799, + "fcm_dpo/margin": 451.6276550292969, + "fcm_dpo/q_t": 0.3912101984024048, + "grad_norm": 39.40283966064453, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.8707677721977234, + "logits/rejected": -0.8923947811126709, + "logps/chosen": -607.045654296875, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.42999267578125, + "logps/rejected": -1111.620849609375, + "loss": 1.0481, + "margin_dpo/margin_mean": 451.6276550292969, + "margin_dpo/margin_std": 583.0654907226562, + "step": 540 + }, + { + "KL/chosen_KL_mean": -658.597412109375, + "KL/mean": -783.5596923828125, + "KL/rejected_KL_mean": -908.5219116210938, + "KL/std": 451.7571716308594, + "epoch": 0.7944199706314243, + "fcm_dpo/beta": 0.001078011584468186, + "fcm_dpo/delta": -0.004111842717975378, + "fcm_dpo/margin": 249.92453002929688, + "fcm_dpo/q_t": 0.43801432847976685, + "grad_norm": 53.83041763305664, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -0.9981366395950317, + "logits/rejected": -0.9937785863876343, + "logps/chosen": -736.8777465820312, + "logps/ref_chosen": -78.28036499023438, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -1011.7955932617188, + "loss": 1.2177, + "margin_dpo/margin_mean": 249.92453002929688, + "margin_dpo/margin_std": 536.0916748046875, + "step": 541 + }, + { + "KL/chosen_KL_mean": -562.605712890625, + "KL/mean": -788.047607421875, + "KL/rejected_KL_mean": -1013.4893798828125, + "KL/std": 504.0601806640625, + "epoch": 0.7958883994126285, + "fcm_dpo/beta": 0.0010561456438153982, + "fcm_dpo/delta": -0.08209630846977234, + "fcm_dpo/margin": 450.8837585449219, + "fcm_dpo/q_t": 0.3928494155406952, + "grad_norm": 52.30241012573242, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -0.9771475791931152, + "logits/rejected": -1.0094921588897705, + "logps/chosen": -620.0906982421875, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -1109.9644775390625, + "loss": 1.0592, + "margin_dpo/margin_mean": 450.8837585449219, + "margin_dpo/margin_std": 593.1970825195312, + "step": 542 + }, + { + "KL/chosen_KL_mean": -628.9979858398438, + "KL/mean": -786.9501953125, + "KL/rejected_KL_mean": -944.90234375, + "KL/std": 587.5977783203125, + "epoch": 0.7973568281938326, + "fcm_dpo/beta": 0.0010682092979550362, + "fcm_dpo/delta": 0.06477095186710358, + "fcm_dpo/margin": 315.904296875, + "fcm_dpo/q_t": 0.4326469302177429, + "grad_norm": 30.09369659423828, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.9427838325500488, + "logits/rejected": -0.9720630645751953, + "logps/chosen": -689.615478515625, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -1050.501220703125, + "loss": 1.1962, + "margin_dpo/margin_mean": 315.9043273925781, + "margin_dpo/margin_std": 699.5291748046875, + "step": 543 + }, + { + "KL/chosen_KL_mean": -639.6248779296875, + "KL/mean": -832.869873046875, + "KL/rejected_KL_mean": -1026.1148681640625, + "KL/std": 483.07550048828125, + "epoch": 0.7988252569750367, + "fcm_dpo/beta": 0.0010710186325013638, + "fcm_dpo/delta": -0.014568203128874302, + "fcm_dpo/margin": 386.49005126953125, + "fcm_dpo/q_t": 0.40565305948257446, + "grad_norm": 32.88768005371094, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -0.9224880933761597, + "logits/rejected": -0.9422965049743652, + "logps/chosen": -699.2671508789062, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -1127.069580078125, + "loss": 1.094, + "margin_dpo/margin_mean": 386.49005126953125, + "margin_dpo/margin_std": 549.4237060546875, + "step": 544 + }, + { + "KL/chosen_KL_mean": -592.945556640625, + "KL/mean": -817.5401611328125, + "KL/rejected_KL_mean": -1042.134765625, + "KL/std": 495.79449462890625, + "epoch": 0.8002936857562408, + "fcm_dpo/beta": 0.0010588113218545914, + "fcm_dpo/delta": -0.07936666160821915, + "fcm_dpo/margin": 449.18927001953125, + "fcm_dpo/q_t": 0.39326316118240356, + "grad_norm": 32.99470520019531, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.8537076711654663, + "logits/rejected": -0.8849306106567383, + "logps/chosen": -660.5941162109375, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -1138.042724609375, + "loss": 1.0488, + "margin_dpo/margin_mean": 449.18927001953125, + "margin_dpo/margin_std": 571.0772705078125, + "step": 545 + }, + { + "KL/chosen_KL_mean": -566.8978271484375, + "KL/mean": -716.198486328125, + "KL/rejected_KL_mean": -865.499267578125, + "KL/std": 434.204833984375, + "epoch": 0.801762114537445, + "fcm_dpo/beta": 0.0010674262885004282, + "fcm_dpo/delta": 0.08395257592201233, + "fcm_dpo/margin": 298.6014709472656, + "fcm_dpo/q_t": 0.42482131719589233, + "grad_norm": 32.57497024536133, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.9205929040908813, + "logits/rejected": -0.910815954208374, + "logps/chosen": -617.6420288085938, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -947.365478515625, + "loss": 1.1519, + "margin_dpo/margin_mean": 298.6014709472656, + "margin_dpo/margin_std": 461.811767578125, + "step": 546 + }, + { + "KL/chosen_KL_mean": -573.2467041015625, + "KL/mean": -762.8134765625, + "KL/rejected_KL_mean": -952.38037109375, + "KL/std": 485.2721862792969, + "epoch": 0.8032305433186491, + "fcm_dpo/beta": 0.0010726114269345999, + "fcm_dpo/delta": -0.006979792378842831, + "fcm_dpo/margin": 379.1336669921875, + "fcm_dpo/q_t": 0.40798118710517883, + "grad_norm": 50.77888870239258, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.8564267158508301, + "logits/rejected": -0.8684166669845581, + "logps/chosen": -646.9344482421875, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -1043.1417236328125, + "loss": 1.0961, + "margin_dpo/margin_mean": 379.1336669921875, + "margin_dpo/margin_std": 533.245361328125, + "step": 547 + }, + { + "KL/chosen_KL_mean": -600.245361328125, + "KL/mean": -777.9320068359375, + "KL/rejected_KL_mean": -955.61865234375, + "KL/std": 507.2066650390625, + "epoch": 0.8046989720998532, + "fcm_dpo/beta": 0.0010729740606620908, + "fcm_dpo/delta": 0.019438141956925392, + "fcm_dpo/margin": 355.3732604980469, + "fcm_dpo/q_t": 0.4163675606250763, + "grad_norm": 31.580347061157227, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -0.9392424821853638, + "logits/rejected": -0.9410542845726013, + "logps/chosen": -665.49169921875, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -1049.7366943359375, + "loss": 1.1184, + "margin_dpo/margin_mean": 355.373291015625, + "margin_dpo/margin_std": 558.9320068359375, + "step": 548 + }, + { + "KL/chosen_KL_mean": -618.5299072265625, + "KL/mean": -756.0477294921875, + "KL/rejected_KL_mean": -893.565673828125, + "KL/std": 416.1364440917969, + "epoch": 0.8061674008810573, + "fcm_dpo/beta": 0.0010954017052426934, + "fcm_dpo/delta": 0.10109251737594604, + "fcm_dpo/margin": 275.03582763671875, + "fcm_dpo/q_t": 0.43178755044937134, + "grad_norm": 48.66642379760742, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -0.9122521877288818, + "logits/rejected": -0.9025084376335144, + "logps/chosen": -667.7421875, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -967.4760131835938, + "loss": 1.1787, + "margin_dpo/margin_mean": 275.03582763671875, + "margin_dpo/margin_std": 496.511474609375, + "step": 549 + }, + { + "KL/chosen_KL_mean": -637.8577880859375, + "KL/mean": -800.6353759765625, + "KL/rejected_KL_mean": -963.4129638671875, + "KL/std": 484.2886962890625, + "epoch": 0.8076358296622613, + "fcm_dpo/beta": 0.0011030520545318723, + "fcm_dpo/delta": 0.042437393218278885, + "fcm_dpo/margin": 325.55517578125, + "fcm_dpo/q_t": 0.4183220863342285, + "grad_norm": 37.804141998291016, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.8820310831069946, + "logits/rejected": -0.9049103260040283, + "logps/chosen": -694.664794921875, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -1058.538818359375, + "loss": 1.132, + "margin_dpo/margin_mean": 325.55517578125, + "margin_dpo/margin_std": 513.7775268554688, + "step": 550 + }, + { + "KL/chosen_KL_mean": -547.8771362304688, + "KL/mean": -811.1815185546875, + "KL/rejected_KL_mean": -1074.48583984375, + "KL/std": 508.75482177734375, + "epoch": 0.8091042584434655, + "fcm_dpo/beta": 0.0010755530092865229, + "fcm_dpo/delta": -0.17676769196987152, + "fcm_dpo/margin": 526.6087036132812, + "fcm_dpo/q_t": 0.36954620480537415, + "grad_norm": 56.2789306640625, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.8909007906913757, + "logits/rejected": -0.9451035857200623, + "logps/chosen": -606.9834594726562, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -1186.15869140625, + "loss": 0.9641, + "margin_dpo/margin_mean": 526.6087036132812, + "margin_dpo/margin_std": 504.5882568359375, + "step": 551 + }, + { + "KL/chosen_KL_mean": -535.0650634765625, + "KL/mean": -834.066162109375, + "KL/rejected_KL_mean": -1133.067138671875, + "KL/std": 607.322265625, + "epoch": 0.8105726872246696, + "fcm_dpo/beta": 0.0010228096507489681, + "fcm_dpo/delta": -0.229129359126091, + "fcm_dpo/margin": 598.0020751953125, + "fcm_dpo/q_t": 0.3673725724220276, + "grad_norm": 48.699928283691406, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.9140257835388184, + "logits/rejected": -0.9522314071655273, + "logps/chosen": -597.419677734375, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -1237.62939453125, + "loss": 0.9746, + "margin_dpo/margin_mean": 598.0020751953125, + "margin_dpo/margin_std": 696.8597412109375, + "step": 552 + }, + { + "KL/chosen_KL_mean": -627.572509765625, + "KL/mean": -790.2903442382812, + "KL/rejected_KL_mean": -953.0081176757812, + "KL/std": 503.21502685546875, + "epoch": 0.8120411160058737, + "fcm_dpo/beta": 0.0010280333226546645, + "fcm_dpo/delta": 0.06775818020105362, + "fcm_dpo/margin": 325.43560791015625, + "fcm_dpo/q_t": 0.42561495304107666, + "grad_norm": 26.667526245117188, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.8898186683654785, + "logits/rejected": -0.8853092789649963, + "logps/chosen": -695.8313598632812, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -1051.105224609375, + "loss": 1.1505, + "margin_dpo/margin_mean": 325.43560791015625, + "margin_dpo/margin_std": 544.8580322265625, + "step": 553 + }, + { + "KL/chosen_KL_mean": -648.32177734375, + "KL/mean": -857.53759765625, + "KL/rejected_KL_mean": -1066.75341796875, + "KL/std": 539.8403930664062, + "epoch": 0.8135095447870778, + "fcm_dpo/beta": 0.0010344828478991985, + "fcm_dpo/delta": -0.034839678555727005, + "fcm_dpo/margin": 418.43157958984375, + "fcm_dpo/q_t": 0.40528228878974915, + "grad_norm": 76.91921997070312, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.8666242957115173, + "logits/rejected": -0.8389246463775635, + "logps/chosen": -716.26953125, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -1156.5361328125, + "loss": 1.1182, + "margin_dpo/margin_mean": 418.43157958984375, + "margin_dpo/margin_std": 690.6478881835938, + "step": 554 + }, + { + "KL/chosen_KL_mean": -650.5548095703125, + "KL/mean": -899.6168212890625, + "KL/rejected_KL_mean": -1148.6787109375, + "KL/std": 573.9271850585938, + "epoch": 0.8149779735682819, + "fcm_dpo/beta": 0.0010111583396792412, + "fcm_dpo/delta": -0.10909023135900497, + "fcm_dpo/margin": 498.12396240234375, + "fcm_dpo/q_t": 0.3918069899082184, + "grad_norm": 40.76702880859375, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -0.9580224752426147, + "logits/rejected": -1.0123507976531982, + "logps/chosen": -703.8853149414062, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -1257.158203125, + "loss": 1.0655, + "margin_dpo/margin_mean": 498.12396240234375, + "margin_dpo/margin_std": 737.4571533203125, + "step": 555 + }, + { + "KL/chosen_KL_mean": -646.2598266601562, + "KL/mean": -798.2440185546875, + "KL/rejected_KL_mean": -950.2281494140625, + "KL/std": 454.10919189453125, + "epoch": 0.8164464023494861, + "fcm_dpo/beta": 0.0010182505939155817, + "fcm_dpo/delta": 0.09340062737464905, + "fcm_dpo/margin": 303.96832275390625, + "fcm_dpo/q_t": 0.4286388158798218, + "grad_norm": 35.782039642333984, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -0.9508916735649109, + "logits/rejected": -0.9452144503593445, + "logps/chosen": -704.904296875, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -1051.568603515625, + "loss": 1.1524, + "margin_dpo/margin_mean": 303.96832275390625, + "margin_dpo/margin_std": 460.0691223144531, + "step": 556 + }, + { + "KL/chosen_KL_mean": -717.3458251953125, + "KL/mean": -942.2198486328125, + "KL/rejected_KL_mean": -1167.0938720703125, + "KL/std": 636.468505859375, + "epoch": 0.8179148311306902, + "fcm_dpo/beta": 0.0010126400738954544, + "fcm_dpo/delta": -0.05826106667518616, + "fcm_dpo/margin": 449.748046875, + "fcm_dpo/q_t": 0.4009940028190613, + "grad_norm": 52.255699157714844, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -1.0088746547698975, + "logits/rejected": -1.0433576107025146, + "logps/chosen": -785.1865234375, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93965911865234, + "logps/rejected": -1277.033447265625, + "loss": 1.1038, + "margin_dpo/margin_mean": 449.748046875, + "margin_dpo/margin_std": 723.2161865234375, + "step": 557 + }, + { + "KL/chosen_KL_mean": -643.977783203125, + "KL/mean": -949.288330078125, + "KL/rejected_KL_mean": -1254.5987548828125, + "KL/std": 584.4927978515625, + "epoch": 0.8193832599118943, + "fcm_dpo/beta": 0.0009801845299080014, + "fcm_dpo/delta": -0.21119916439056396, + "fcm_dpo/margin": 610.62109375, + "fcm_dpo/q_t": 0.36214083433151245, + "grad_norm": 30.629545211791992, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -1.027779221534729, + "logits/rejected": -1.046311855316162, + "logps/chosen": -706.3460083007812, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -1356.759765625, + "loss": 0.9661, + "margin_dpo/margin_mean": 610.62109375, + "margin_dpo/margin_std": 642.599365234375, + "step": 558 + }, + { + "KL/chosen_KL_mean": -743.7402954101562, + "KL/mean": -1001.60595703125, + "KL/rejected_KL_mean": -1259.4716796875, + "KL/std": 617.6702270507812, + "epoch": 0.8208516886930984, + "fcm_dpo/beta": 0.0009573526913300157, + "fcm_dpo/delta": -0.09849410504102707, + "fcm_dpo/margin": 515.7313842773438, + "fcm_dpo/q_t": 0.3901920020580292, + "grad_norm": 32.09720993041992, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -1.132476568222046, + "logits/rejected": -1.138415813446045, + "logps/chosen": -804.49267578125, + "logps/ref_chosen": -60.752323150634766, + "logps/ref_rejected": -93.44229125976562, + "logps/rejected": -1352.9139404296875, + "loss": 1.0467, + "margin_dpo/margin_mean": 515.7313232421875, + "margin_dpo/margin_std": 678.709228515625, + "step": 559 + }, + { + "KL/chosen_KL_mean": -679.7794189453125, + "KL/mean": -859.0238647460938, + "KL/rejected_KL_mean": -1038.268310546875, + "KL/std": 535.8975830078125, + "epoch": 0.8223201174743024, + "fcm_dpo/beta": 0.0009546001674607396, + "fcm_dpo/delta": 0.05963495746254921, + "fcm_dpo/margin": 358.48895263671875, + "fcm_dpo/q_t": 0.4228675365447998, + "grad_norm": 37.20246505737305, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.8968836069107056, + "logits/rejected": -0.8791143894195557, + "logps/chosen": -737.8831787109375, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -1118.259521484375, + "loss": 1.1513, + "margin_dpo/margin_mean": 358.48895263671875, + "margin_dpo/margin_std": 598.878173828125, + "step": 560 + }, + { + "KL/chosen_KL_mean": -780.15966796875, + "KL/mean": -938.270263671875, + "KL/rejected_KL_mean": -1096.380615234375, + "KL/std": 530.6477661132812, + "epoch": 0.8237885462555066, + "fcm_dpo/beta": 0.0009781813714653254, + "fcm_dpo/delta": 0.09319829940795898, + "fcm_dpo/margin": 316.22100830078125, + "fcm_dpo/q_t": 0.4291490614414215, + "grad_norm": 47.342132568359375, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -1.0569636821746826, + "logits/rejected": -1.0779967308044434, + "logps/chosen": -831.0726318359375, + "logps/ref_chosen": -50.912879943847656, + "logps/ref_rejected": -99.06856536865234, + "logps/rejected": -1195.44921875, + "loss": 1.1995, + "margin_dpo/margin_mean": 316.22100830078125, + "margin_dpo/margin_std": 669.9414672851562, + "step": 561 + }, + { + "KL/chosen_KL_mean": -745.3583984375, + "KL/mean": -957.2404174804688, + "KL/rejected_KL_mean": -1169.122314453125, + "KL/std": 593.1848754882812, + "epoch": 0.8252569750367107, + "fcm_dpo/beta": 0.0009731657337397337, + "fcm_dpo/delta": -0.013568423688411713, + "fcm_dpo/margin": 423.7640380859375, + "fcm_dpo/q_t": 0.4083036184310913, + "grad_norm": 35.87330627441406, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -0.9769254326820374, + "logits/rejected": -0.9813790321350098, + "logps/chosen": -804.82275390625, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -1265.6650390625, + "loss": 1.1048, + "margin_dpo/margin_mean": 423.7640380859375, + "margin_dpo/margin_std": 645.1516723632812, + "step": 562 + }, + { + "KL/chosen_KL_mean": -829.703125, + "KL/mean": -990.771240234375, + "KL/rejected_KL_mean": -1151.83935546875, + "KL/std": 633.045166015625, + "epoch": 0.8267254038179148, + "fcm_dpo/beta": 0.000972322653979063, + "fcm_dpo/delta": -0.05465248227119446, + "fcm_dpo/margin": 322.13623046875, + "fcm_dpo/q_t": 0.42368167638778687, + "grad_norm": 42.06772232055664, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -1.0264474153518677, + "logits/rejected": -1.0321646928787231, + "logps/chosen": -890.311279296875, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -1246.406982421875, + "loss": 1.2296, + "margin_dpo/margin_mean": 322.13623046875, + "margin_dpo/margin_std": 744.3589477539062, + "step": 563 + }, + { + "KL/chosen_KL_mean": -747.0021362304688, + "KL/mean": -952.5867919921875, + "KL/rejected_KL_mean": -1158.1715087890625, + "KL/std": 572.9649658203125, + "epoch": 0.8281938325991189, + "fcm_dpo/beta": 0.0009697899222373962, + "fcm_dpo/delta": 0.0011525209993124008, + "fcm_dpo/margin": 411.1693115234375, + "fcm_dpo/q_t": 0.4103137254714966, + "grad_norm": 35.352901458740234, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -1.1010963916778564, + "logits/rejected": -1.0915511846542358, + "logps/chosen": -821.2705078125, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.23818969726562, + "logps/rejected": -1251.40966796875, + "loss": 1.099, + "margin_dpo/margin_mean": 411.1693115234375, + "margin_dpo/margin_std": 568.3698120117188, + "step": 564 + }, + { + "KL/chosen_KL_mean": -776.9501953125, + "KL/mean": -992.6700439453125, + "KL/rejected_KL_mean": -1208.389892578125, + "KL/std": 637.2417602539062, + "epoch": 0.8296622613803231, + "fcm_dpo/beta": 0.0009645746322348714, + "fcm_dpo/delta": -0.01686248928308487, + "fcm_dpo/margin": 431.43963623046875, + "fcm_dpo/q_t": 0.4098883271217346, + "grad_norm": 44.35929870605469, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -1.0143120288848877, + "logits/rejected": -1.0142502784729004, + "logps/chosen": -845.9700927734375, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -1294.1688232421875, + "loss": 1.1322, + "margin_dpo/margin_mean": 431.43963623046875, + "margin_dpo/margin_std": 756.8804931640625, + "step": 565 + }, + { + "KL/chosen_KL_mean": -730.8787841796875, + "KL/mean": -973.5142822265625, + "KL/rejected_KL_mean": -1216.14990234375, + "KL/std": 658.5827026367188, + "epoch": 0.8311306901615272, + "fcm_dpo/beta": 0.0009599901968613267, + "fcm_dpo/delta": -0.06941938400268555, + "fcm_dpo/margin": 485.2709655761719, + "fcm_dpo/q_t": 0.39742326736450195, + "grad_norm": 35.317893981933594, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -1.0451146364212036, + "logits/rejected": -1.0700435638427734, + "logps/chosen": -797.424072265625, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86932373046875, + "logps/rejected": -1320.0191650390625, + "loss": 1.1008, + "margin_dpo/margin_mean": 485.27099609375, + "margin_dpo/margin_std": 765.5435180664062, + "step": 566 + }, + { + "KL/chosen_KL_mean": -689.0420532226562, + "KL/mean": -877.3385009765625, + "KL/rejected_KL_mean": -1065.6348876953125, + "KL/std": 457.2042236328125, + "epoch": 0.8325991189427313, + "fcm_dpo/beta": 0.0009558956371620297, + "fcm_dpo/delta": 0.0415302999317646, + "fcm_dpo/margin": 376.59283447265625, + "fcm_dpo/q_t": 0.41558361053466797, + "grad_norm": 29.64704132080078, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -1.1439913511276245, + "logits/rejected": -1.1560258865356445, + "logps/chosen": -741.9003295898438, + "logps/ref_chosen": -52.85829544067383, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -1151.005859375, + "loss": 1.1121, + "margin_dpo/margin_mean": 376.59283447265625, + "margin_dpo/margin_std": 499.68634033203125, + "step": 567 + }, + { + "KL/chosen_KL_mean": -682.358642578125, + "KL/mean": -934.0941162109375, + "KL/rejected_KL_mean": -1185.82958984375, + "KL/std": 537.24072265625, + "epoch": 0.8340675477239354, + "fcm_dpo/beta": 0.0009455858962610364, + "fcm_dpo/delta": -0.0800839364528656, + "fcm_dpo/margin": 503.4710693359375, + "fcm_dpo/q_t": 0.3895892798900604, + "grad_norm": 32.13995361328125, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -0.9833190441131592, + "logits/rejected": -1.0224902629852295, + "logps/chosen": -727.551025390625, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236907958984, + "logps/rejected": -1274.9219970703125, + "loss": 1.0304, + "margin_dpo/margin_mean": 503.47100830078125, + "margin_dpo/margin_std": 561.0274658203125, + "step": 568 + }, + { + "KL/chosen_KL_mean": -783.7449951171875, + "KL/mean": -988.6644287109375, + "KL/rejected_KL_mean": -1193.583740234375, + "KL/std": 691.3892211914062, + "epoch": 0.8355359765051396, + "fcm_dpo/beta": 0.0009511442622169852, + "fcm_dpo/delta": 0.009604483842849731, + "fcm_dpo/margin": 409.83868408203125, + "fcm_dpo/q_t": 0.4123598337173462, + "grad_norm": 56.671836853027344, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -1.0582460165023804, + "logits/rejected": -1.064152479171753, + "logps/chosen": -847.465576171875, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -1272.68701171875, + "loss": 1.1468, + "margin_dpo/margin_mean": 409.83868408203125, + "margin_dpo/margin_std": 721.668701171875, + "step": 569 + }, + { + "KL/chosen_KL_mean": -723.0333862304688, + "KL/mean": -910.5472412109375, + "KL/rejected_KL_mean": -1098.06103515625, + "KL/std": 545.8590087890625, + "epoch": 0.8370044052863436, + "fcm_dpo/beta": 0.0009502613684162498, + "fcm_dpo/delta": 0.0452612042427063, + "fcm_dpo/margin": 375.0276184082031, + "fcm_dpo/q_t": 0.41972124576568604, + "grad_norm": 27.773193359375, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -1.1307826042175293, + "logits/rejected": -1.123297095298767, + "logps/chosen": -784.64794921875, + "logps/ref_chosen": -61.61454391479492, + "logps/ref_rejected": -82.14186096191406, + "logps/rejected": -1180.202880859375, + "loss": 1.1368, + "margin_dpo/margin_mean": 375.02764892578125, + "margin_dpo/margin_std": 592.921875, + "step": 570 + }, + { + "KL/chosen_KL_mean": -717.0699462890625, + "KL/mean": -943.7703857421875, + "KL/rejected_KL_mean": -1170.4708251953125, + "KL/std": 552.2366333007812, + "epoch": 0.8384728340675477, + "fcm_dpo/beta": 0.0009471910889260471, + "fcm_dpo/delta": -0.030962642282247543, + "fcm_dpo/margin": 453.40087890625, + "fcm_dpo/q_t": 0.4038824439048767, + "grad_norm": 41.67679977416992, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -1.0074293613433838, + "logits/rejected": -1.0243608951568604, + "logps/chosen": -770.1240234375, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -1261.8076171875, + "loss": 1.0993, + "margin_dpo/margin_mean": 453.40087890625, + "margin_dpo/margin_std": 687.6110229492188, + "step": 571 + }, + { + "KL/chosen_KL_mean": -762.98486328125, + "KL/mean": -999.034912109375, + "KL/rejected_KL_mean": -1235.0849609375, + "KL/std": 659.6268310546875, + "epoch": 0.8399412628487518, + "fcm_dpo/beta": 0.000938563549425453, + "fcm_dpo/delta": -0.04543805494904518, + "fcm_dpo/margin": 472.1002197265625, + "fcm_dpo/q_t": 0.4047321081161499, + "grad_norm": 35.46324157714844, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -1.0459448099136353, + "logits/rejected": -1.078913688659668, + "logps/chosen": -811.444091796875, + "logps/ref_chosen": -48.45928955078125, + "logps/ref_rejected": -83.55703735351562, + "logps/rejected": -1318.64208984375, + "loss": 1.1112, + "margin_dpo/margin_mean": 472.1002197265625, + "margin_dpo/margin_std": 790.4967651367188, + "step": 572 + }, + { + "KL/chosen_KL_mean": -757.2117919921875, + "KL/mean": -961.093505859375, + "KL/rejected_KL_mean": -1164.975341796875, + "KL/std": 571.9259643554688, + "epoch": 0.8414096916299559, + "fcm_dpo/beta": 0.0009429033380001783, + "fcm_dpo/delta": 0.016076089814305305, + "fcm_dpo/margin": 407.7635498046875, + "fcm_dpo/q_t": 0.4132142663002014, + "grad_norm": 28.161340713500977, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -1.086681604385376, + "logits/rejected": -1.0990102291107178, + "logps/chosen": -808.8343505859375, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -1250.30029296875, + "loss": 1.1033, + "margin_dpo/margin_mean": 407.7635498046875, + "margin_dpo/margin_std": 570.0986328125, + "step": 573 + }, + { + "KL/chosen_KL_mean": -668.5501708984375, + "KL/mean": -886.3728637695312, + "KL/rejected_KL_mean": -1104.195556640625, + "KL/std": 513.22802734375, + "epoch": 0.8428781204111601, + "fcm_dpo/beta": 0.0009430091013200581, + "fcm_dpo/delta": -0.011370273306965828, + "fcm_dpo/margin": 435.6454772949219, + "fcm_dpo/q_t": 0.40598154067993164, + "grad_norm": 33.936519622802734, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -1.0922186374664307, + "logits/rejected": -1.1221637725830078, + "logps/chosen": -719.5946044921875, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -1197.001953125, + "loss": 1.0903, + "margin_dpo/margin_mean": 435.6454772949219, + "margin_dpo/margin_std": 592.3309936523438, + "step": 574 + }, + { + "KL/chosen_KL_mean": -727.7802734375, + "KL/mean": -933.4301147460938, + "KL/rejected_KL_mean": -1139.079833984375, + "KL/std": 528.300537109375, + "epoch": 0.8443465491923642, + "fcm_dpo/beta": 0.0009349288884550333, + "fcm_dpo/delta": 0.014928296208381653, + "fcm_dpo/margin": 411.2996520996094, + "fcm_dpo/q_t": 0.4125257134437561, + "grad_norm": 35.51095199584961, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.9489999413490295, + "logits/rejected": -0.9602969288825989, + "logps/chosen": -799.5704345703125, + "logps/ref_chosen": -71.7901382446289, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -1234.466064453125, + "loss": 1.1378, + "margin_dpo/margin_mean": 411.2996520996094, + "margin_dpo/margin_std": 677.067138671875, + "step": 575 + }, + { + "KL/chosen_KL_mean": -690.3848876953125, + "KL/mean": -842.4647216796875, + "KL/rejected_KL_mean": -994.5445556640625, + "KL/std": 470.174560546875, + "epoch": 0.8458149779735683, + "fcm_dpo/beta": 0.0009562689810991287, + "fcm_dpo/delta": 0.11256685107946396, + "fcm_dpo/margin": 304.1596374511719, + "fcm_dpo/q_t": 0.43350422382354736, + "grad_norm": 36.57832717895508, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -1.0214297771453857, + "logits/rejected": -1.048740029335022, + "logps/chosen": -744.6478881835938, + "logps/ref_chosen": -54.262962341308594, + "logps/ref_rejected": -100.75428009033203, + "logps/rejected": -1095.298828125, + "loss": 1.1852, + "margin_dpo/margin_mean": 304.15960693359375, + "margin_dpo/margin_std": 556.28271484375, + "step": 576 + }, + { + "KL/chosen_KL_mean": -627.6497802734375, + "KL/mean": -830.4925537109375, + "KL/rejected_KL_mean": -1033.33544921875, + "KL/std": 542.0623779296875, + "epoch": 0.8472834067547724, + "fcm_dpo/beta": 0.000964190810918808, + "fcm_dpo/delta": 0.00915931724011898, + "fcm_dpo/margin": 405.6855773925781, + "fcm_dpo/q_t": 0.4120888113975525, + "grad_norm": 29.529443740844727, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -1.0218915939331055, + "logits/rejected": -1.0101161003112793, + "logps/chosen": -689.5594482421875, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -1117.406005859375, + "loss": 1.1187, + "margin_dpo/margin_mean": 405.68560791015625, + "margin_dpo/margin_std": 598.992919921875, + "step": 577 + }, + { + "KL/chosen_KL_mean": -595.876220703125, + "KL/mean": -812.5816650390625, + "KL/rejected_KL_mean": -1029.2872314453125, + "KL/std": 519.2777099609375, + "epoch": 0.8487518355359766, + "fcm_dpo/beta": 0.0009588984539732337, + "fcm_dpo/delta": -0.01690073311328888, + "fcm_dpo/margin": 433.4109802246094, + "fcm_dpo/q_t": 0.40431180596351624, + "grad_norm": 50.43282699584961, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.9214882850646973, + "logits/rejected": -0.9523541331291199, + "logps/chosen": -645.139892578125, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.4362564086914, + "logps/rejected": -1112.723388671875, + "loss": 1.0771, + "margin_dpo/margin_mean": 433.41094970703125, + "margin_dpo/margin_std": 535.86767578125, + "step": 578 + }, + { + "KL/chosen_KL_mean": -723.193603515625, + "KL/mean": -859.6403198242188, + "KL/rejected_KL_mean": -996.0870361328125, + "KL/std": 547.017578125, + "epoch": 0.8502202643171806, + "fcm_dpo/beta": 0.0009806466987356544, + "fcm_dpo/delta": 0.13623100519180298, + "fcm_dpo/margin": 272.8934326171875, + "fcm_dpo/q_t": 0.43853724002838135, + "grad_norm": 68.8424301147461, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -0.9866000413894653, + "logits/rejected": -0.9893920421600342, + "logps/chosen": -782.0794677734375, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -1090.8746337890625, + "loss": 1.2174, + "margin_dpo/margin_mean": 272.8934326171875, + "margin_dpo/margin_std": 593.593017578125, + "step": 579 + }, + { + "KL/chosen_KL_mean": -507.4736633300781, + "KL/mean": -756.1246948242188, + "KL/rejected_KL_mean": -1004.7757568359375, + "KL/std": 589.247802734375, + "epoch": 0.8516886930983847, + "fcm_dpo/beta": 0.0009732701582834125, + "fcm_dpo/delta": -0.0888274759054184, + "fcm_dpo/margin": 497.30218505859375, + "fcm_dpo/q_t": 0.3939579725265503, + "grad_norm": 39.8839111328125, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.8748548030853271, + "logits/rejected": -0.9090137481689453, + "logps/chosen": -556.1804809570312, + "logps/ref_chosen": -48.70683670043945, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -1086.5341796875, + "loss": 1.0507, + "margin_dpo/margin_mean": 497.3021545410156, + "margin_dpo/margin_std": 672.8572998046875, + "step": 580 + }, + { + "KL/chosen_KL_mean": -710.769287109375, + "KL/mean": -885.140380859375, + "KL/rejected_KL_mean": -1059.5115966796875, + "KL/std": 566.3184814453125, + "epoch": 0.8531571218795888, + "fcm_dpo/beta": 0.0009817921090871096, + "fcm_dpo/delta": 0.05957435816526413, + "fcm_dpo/margin": 348.74224853515625, + "fcm_dpo/q_t": 0.42394953966140747, + "grad_norm": 37.68699264526367, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -1.0647389888763428, + "logits/rejected": -1.0730311870574951, + "logps/chosen": -772.9916381835938, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -1151.247314453125, + "loss": 1.1566, + "margin_dpo/margin_mean": 348.74224853515625, + "margin_dpo/margin_std": 622.7120361328125, + "step": 581 + }, + { + "KL/chosen_KL_mean": -618.490478515625, + "KL/mean": -812.1707153320312, + "KL/rejected_KL_mean": -1005.8509521484375, + "KL/std": 488.8114929199219, + "epoch": 0.8546255506607929, + "fcm_dpo/beta": 0.0009761706460267305, + "fcm_dpo/delta": -0.07844623178243637, + "fcm_dpo/margin": 387.3603515625, + "fcm_dpo/q_t": 0.41167423129081726, + "grad_norm": 29.930849075317383, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -1.0147862434387207, + "logits/rejected": -1.0369963645935059, + "logps/chosen": -676.7191772460938, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -1115.9205322265625, + "loss": 1.1147, + "margin_dpo/margin_mean": 387.3603820800781, + "margin_dpo/margin_std": 530.1618041992188, + "step": 582 + }, + { + "KL/chosen_KL_mean": -691.4786376953125, + "KL/mean": -823.6566162109375, + "KL/rejected_KL_mean": -955.8345947265625, + "KL/std": 531.1171264648438, + "epoch": 0.856093979441997, + "fcm_dpo/beta": 0.0009659786010161042, + "fcm_dpo/delta": -0.013101693242788315, + "fcm_dpo/margin": 264.35589599609375, + "fcm_dpo/q_t": 0.4426102340221405, + "grad_norm": 65.24808502197266, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -0.9646916389465332, + "logits/rejected": -0.9632136821746826, + "logps/chosen": -748.341552734375, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -1044.238525390625, + "loss": 1.2428, + "margin_dpo/margin_mean": 264.3559265136719, + "margin_dpo/margin_std": 639.0501098632812, + "step": 583 + }, + { + "KL/chosen_KL_mean": -608.3406982421875, + "KL/mean": -859.684326171875, + "KL/rejected_KL_mean": -1111.0279541015625, + "KL/std": 554.8764038085938, + "epoch": 0.8575624082232012, + "fcm_dpo/beta": 0.0009539815364405513, + "fcm_dpo/delta": -0.08356797695159912, + "fcm_dpo/margin": 502.6873779296875, + "fcm_dpo/q_t": 0.39147210121154785, + "grad_norm": 36.447509765625, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -0.9898185133934021, + "logits/rejected": -1.0079997777938843, + "logps/chosen": -665.2413330078125, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -1208.6640625, + "loss": 1.0358, + "margin_dpo/margin_mean": 502.6874084472656, + "margin_dpo/margin_std": 587.2181396484375, + "step": 584 + }, + { + "KL/chosen_KL_mean": -696.600341796875, + "KL/mean": -919.7645263671875, + "KL/rejected_KL_mean": -1142.9287109375, + "KL/std": 609.264892578125, + "epoch": 0.8590308370044053, + "fcm_dpo/beta": 0.0009440815774723887, + "fcm_dpo/delta": -0.022542130202054977, + "fcm_dpo/margin": 446.328369140625, + "fcm_dpo/q_t": 0.4053837060928345, + "grad_norm": 32.03108215332031, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -0.9939338564872742, + "logits/rejected": -1.0176794528961182, + "logps/chosen": -757.5743408203125, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -1227.0982666015625, + "loss": 1.1155, + "margin_dpo/margin_mean": 446.328369140625, + "margin_dpo/margin_std": 727.369384765625, + "step": 585 + }, + { + "KL/chosen_KL_mean": -743.328125, + "KL/mean": -890.031005859375, + "KL/rejected_KL_mean": -1036.73388671875, + "KL/std": 537.0108642578125, + "epoch": 0.8604992657856094, + "fcm_dpo/beta": 0.0009613102884031832, + "fcm_dpo/delta": 0.12152184545993805, + "fcm_dpo/margin": 293.4059143066406, + "fcm_dpo/q_t": 0.43567806482315063, + "grad_norm": 36.572792053222656, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -1.0138568878173828, + "logits/rejected": -1.0334415435791016, + "logps/chosen": -800.1837158203125, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.80261993408203, + "logps/rejected": -1128.53662109375, + "loss": 1.2015, + "margin_dpo/margin_mean": 293.4058837890625, + "margin_dpo/margin_std": 593.2252197265625, + "step": 586 + }, + { + "KL/chosen_KL_mean": -507.5320129394531, + "KL/mean": -715.7158813476562, + "KL/rejected_KL_mean": -923.899658203125, + "KL/std": 561.7125244140625, + "epoch": 0.8619676945668135, + "fcm_dpo/beta": 0.0009705749107524753, + "fcm_dpo/delta": -0.0043886564671993256, + "fcm_dpo/margin": 416.36767578125, + "fcm_dpo/q_t": 0.40726912021636963, + "grad_norm": 42.70491409301758, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -1.0300676822662354, + "logits/rejected": -1.0568914413452148, + "logps/chosen": -552.2236328125, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -1006.5235595703125, + "loss": 1.0931, + "margin_dpo/margin_mean": 416.36767578125, + "margin_dpo/margin_std": 568.321044921875, + "step": 587 + }, + { + "KL/chosen_KL_mean": -685.742919921875, + "KL/mean": -856.51806640625, + "KL/rejected_KL_mean": -1027.293212890625, + "KL/std": 494.27239990234375, + "epoch": 0.8634361233480177, + "fcm_dpo/beta": 0.0009743442060425878, + "fcm_dpo/delta": 0.06948099285364151, + "fcm_dpo/margin": 341.55035400390625, + "fcm_dpo/q_t": 0.4227873980998993, + "grad_norm": 31.037988662719727, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -0.9946512579917908, + "logits/rejected": -1.018219232559204, + "logps/chosen": -736.037841796875, + "logps/ref_chosen": -50.29494857788086, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -1134.6630859375, + "loss": 1.1426, + "margin_dpo/margin_mean": 341.55035400390625, + "margin_dpo/margin_std": 514.8743896484375, + "step": 588 + }, + { + "KL/chosen_KL_mean": -675.970458984375, + "KL/mean": -908.8043212890625, + "KL/rejected_KL_mean": -1141.63818359375, + "KL/std": 545.934326171875, + "epoch": 0.8649045521292217, + "fcm_dpo/beta": 0.0009745459537953138, + "fcm_dpo/delta": -0.0563356988132, + "fcm_dpo/margin": 465.66778564453125, + "fcm_dpo/q_t": 0.3962155878543854, + "grad_norm": 30.49479103088379, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -0.9664604663848877, + "logits/rejected": -0.9765450954437256, + "logps/chosen": -735.9003295898438, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -1253.2935791015625, + "loss": 1.0564, + "margin_dpo/margin_mean": 465.66778564453125, + "margin_dpo/margin_std": 579.8321533203125, + "step": 589 + }, + { + "KL/chosen_KL_mean": -586.0206298828125, + "KL/mean": -815.29736328125, + "KL/rejected_KL_mean": -1044.573974609375, + "KL/std": 534.37109375, + "epoch": 0.8663729809104258, + "fcm_dpo/beta": 0.0009633679874241352, + "fcm_dpo/delta": -0.04375208914279938, + "fcm_dpo/margin": 458.553466796875, + "fcm_dpo/q_t": 0.3995480537414551, + "grad_norm": 30.36831283569336, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.9607778191566467, + "logits/rejected": -0.9915695190429688, + "logps/chosen": -641.8304443359375, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -1150.636962890625, + "loss": 1.0625, + "margin_dpo/margin_mean": 458.553466796875, + "margin_dpo/margin_std": 575.5927734375, + "step": 590 + }, + { + "KL/chosen_KL_mean": -667.932373046875, + "KL/mean": -878.5347900390625, + "KL/rejected_KL_mean": -1089.13720703125, + "KL/std": 525.6906127929688, + "epoch": 0.8678414096916299, + "fcm_dpo/beta": 0.0009612845606170595, + "fcm_dpo/delta": -0.005106211174279451, + "fcm_dpo/margin": 421.20489501953125, + "fcm_dpo/q_t": 0.40647366642951965, + "grad_norm": 31.83711051940918, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -1.0476133823394775, + "logits/rejected": -1.0394680500030518, + "logps/chosen": -724.1729736328125, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -1172.9234619140625, + "loss": 1.0985, + "margin_dpo/margin_mean": 421.20489501953125, + "margin_dpo/margin_std": 595.3729248046875, + "step": 591 + }, + { + "KL/chosen_KL_mean": -706.2188720703125, + "KL/mean": -897.91455078125, + "KL/rejected_KL_mean": -1089.610107421875, + "KL/std": 542.8892211914062, + "epoch": 0.869309838472834, + "fcm_dpo/beta": 0.0009650047868490219, + "fcm_dpo/delta": 0.031190991401672363, + "fcm_dpo/margin": 383.3913269042969, + "fcm_dpo/q_t": 0.41676104068756104, + "grad_norm": 38.8540153503418, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -1.016085147857666, + "logits/rejected": -1.019473910331726, + "logps/chosen": -754.1591186523438, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -1165.343017578125, + "loss": 1.1713, + "margin_dpo/margin_mean": 383.391357421875, + "margin_dpo/margin_std": 732.060546875, + "step": 592 + }, + { + "KL/chosen_KL_mean": -659.2498779296875, + "KL/mean": -815.0553588867188, + "KL/rejected_KL_mean": -970.86083984375, + "KL/std": 574.805908203125, + "epoch": 0.8707782672540382, + "fcm_dpo/beta": 0.0009783029090613127, + "fcm_dpo/delta": 0.09824425727128983, + "fcm_dpo/margin": 311.6109313964844, + "fcm_dpo/q_t": 0.4322025775909424, + "grad_norm": 47.76630783081055, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.8949644565582275, + "logits/rejected": -0.8865162134170532, + "logps/chosen": -707.9406127929688, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800094604492, + "logps/rejected": -1031.768798828125, + "loss": 1.1964, + "margin_dpo/margin_mean": 311.61090087890625, + "margin_dpo/margin_std": 633.5354614257812, + "step": 593 + }, + { + "KL/chosen_KL_mean": -633.9850463867188, + "KL/mean": -820.4825439453125, + "KL/rejected_KL_mean": -1006.97998046875, + "KL/std": 562.014404296875, + "epoch": 0.8722466960352423, + "fcm_dpo/beta": 0.0009973826818168163, + "fcm_dpo/delta": 0.02790883556008339, + "fcm_dpo/margin": 372.99493408203125, + "fcm_dpo/q_t": 0.4159358739852905, + "grad_norm": 40.98539733886719, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -0.9719296097755432, + "logits/rejected": -0.9616006016731262, + "logps/chosen": -688.919921875, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967803955078, + "logps/rejected": -1093.07958984375, + "loss": 1.1352, + "margin_dpo/margin_mean": 372.99493408203125, + "margin_dpo/margin_std": 592.751708984375, + "step": 594 + }, + { + "KL/chosen_KL_mean": -608.255615234375, + "KL/mean": -821.6201171875, + "KL/rejected_KL_mean": -1034.984619140625, + "KL/std": 540.2607421875, + "epoch": 0.8737151248164464, + "fcm_dpo/beta": 0.0009868217166513205, + "fcm_dpo/delta": -0.022147677838802338, + "fcm_dpo/margin": 426.7290344238281, + "fcm_dpo/q_t": 0.4070265293121338, + "grad_norm": 43.79144287109375, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -0.944753885269165, + "logits/rejected": -0.9602541923522949, + "logps/chosen": -657.676025390625, + "logps/ref_chosen": -49.4204216003418, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -1115.6119384765625, + "loss": 1.0866, + "margin_dpo/margin_mean": 426.7290344238281, + "margin_dpo/margin_std": 598.78759765625, + "step": 595 + }, + { + "KL/chosen_KL_mean": -692.2659301757812, + "KL/mean": -836.673095703125, + "KL/rejected_KL_mean": -981.0802001953125, + "KL/std": 531.4737548828125, + "epoch": 0.8751835535976505, + "fcm_dpo/beta": 0.0010060444474220276, + "fcm_dpo/delta": 0.11245694756507874, + "fcm_dpo/margin": 288.8142395019531, + "fcm_dpo/q_t": 0.4370243549346924, + "grad_norm": 68.51116180419922, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -0.9666332006454468, + "logits/rejected": -0.9320765733718872, + "logps/chosen": -752.0576171875, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -1061.17138671875, + "loss": 1.2115, + "margin_dpo/margin_mean": 288.8142395019531, + "margin_dpo/margin_std": 640.1383666992188, + "step": 596 + }, + { + "KL/chosen_KL_mean": -652.91552734375, + "KL/mean": -912.2109375, + "KL/rejected_KL_mean": -1171.50634765625, + "KL/std": 616.0120849609375, + "epoch": 0.8766519823788547, + "fcm_dpo/beta": 0.0009824027074500918, + "fcm_dpo/delta": -0.11863398551940918, + "fcm_dpo/margin": 518.5908203125, + "fcm_dpo/q_t": 0.3876197040081024, + "grad_norm": 28.101728439331055, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.9247469305992126, + "logits/rejected": -0.9725657105445862, + "logps/chosen": -710.17626953125, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -1272.2000732421875, + "loss": 1.0463, + "margin_dpo/margin_mean": 518.5908203125, + "margin_dpo/margin_std": 676.21826171875, + "step": 597 + }, + { + "KL/chosen_KL_mean": -655.470947265625, + "KL/mean": -857.9932250976562, + "KL/rejected_KL_mean": -1060.515625, + "KL/std": 524.6902465820312, + "epoch": 0.8781204111600588, + "fcm_dpo/beta": 0.0009790980257093906, + "fcm_dpo/delta": 0.002352789044380188, + "fcm_dpo/margin": 405.0446472167969, + "fcm_dpo/q_t": 0.4087187945842743, + "grad_norm": 34.318355560302734, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -1.0320333242416382, + "logits/rejected": -1.0434290170669556, + "logps/chosen": -707.9893798828125, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -1149.95947265625, + "loss": 1.1089, + "margin_dpo/margin_mean": 405.0446472167969, + "margin_dpo/margin_std": 579.4035034179688, + "step": 598 + }, + { + "KL/chosen_KL_mean": -673.4180908203125, + "KL/mean": -848.3385009765625, + "KL/rejected_KL_mean": -1023.2587890625, + "KL/std": 517.2516479492188, + "epoch": 0.8795888399412628, + "fcm_dpo/beta": 0.0009746775031089783, + "fcm_dpo/delta": -0.061856959015131, + "fcm_dpo/margin": 349.8406982421875, + "fcm_dpo/q_t": 0.41758590936660767, + "grad_norm": 32.6776123046875, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -0.9535913467407227, + "logits/rejected": -0.9626870155334473, + "logps/chosen": -723.2208251953125, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -1106.2373046875, + "loss": 1.1342, + "margin_dpo/margin_mean": 349.8407287597656, + "margin_dpo/margin_std": 487.59490966796875, + "step": 599 + }, + { + "KL/chosen_KL_mean": -726.7335205078125, + "KL/mean": -885.2261962890625, + "KL/rejected_KL_mean": -1043.7188720703125, + "KL/std": 520.7708740234375, + "epoch": 0.8810572687224669, + "fcm_dpo/beta": 0.0009859842248260975, + "fcm_dpo/delta": 0.0903782919049263, + "fcm_dpo/margin": 316.98529052734375, + "fcm_dpo/q_t": 0.430108904838562, + "grad_norm": 33.96622848510742, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -1.0657624006271362, + "logits/rejected": -1.0632259845733643, + "logps/chosen": -793.16845703125, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -1129.17529296875, + "loss": 1.1868, + "margin_dpo/margin_mean": 316.98529052734375, + "margin_dpo/margin_std": 626.9907836914062, + "step": 600 + }, + { + "KL/chosen_KL_mean": -737.2628173828125, + "KL/mean": -945.91796875, + "KL/rejected_KL_mean": -1154.5731201171875, + "KL/std": 556.4426879882812, + "epoch": 0.882525697503671, + "fcm_dpo/beta": 0.0009882240556180477, + "fcm_dpo/delta": -0.01307043619453907, + "fcm_dpo/margin": 417.3103942871094, + "fcm_dpo/q_t": 0.4040505588054657, + "grad_norm": 31.343103408813477, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -0.97291100025177, + "logits/rejected": -0.9917502403259277, + "logps/chosen": -796.3963623046875, + "logps/ref_chosen": -59.13361358642578, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -1249.26416015625, + "loss": 1.0847, + "margin_dpo/margin_mean": 417.3103942871094, + "margin_dpo/margin_std": 547.4290771484375, + "step": 601 + }, + { + "KL/chosen_KL_mean": -505.7738037109375, + "KL/mean": -752.028076171875, + "KL/rejected_KL_mean": -998.2823486328125, + "KL/std": 502.33154296875, + "epoch": 0.8839941262848752, + "fcm_dpo/beta": 0.0009830892086029053, + "fcm_dpo/delta": -0.08871287107467651, + "fcm_dpo/margin": 492.5085754394531, + "fcm_dpo/q_t": 0.3889528512954712, + "grad_norm": 67.23153686523438, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -1.0278465747833252, + "logits/rejected": -1.060103416442871, + "logps/chosen": -554.3673095703125, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -1085.950927734375, + "loss": 1.025, + "margin_dpo/margin_mean": 492.5085754394531, + "margin_dpo/margin_std": 532.3154296875, + "step": 602 + }, + { + "KL/chosen_KL_mean": -637.751953125, + "KL/mean": -871.9014892578125, + "KL/rejected_KL_mean": -1106.051025390625, + "KL/std": 581.3104248046875, + "epoch": 0.8854625550660793, + "fcm_dpo/beta": 0.0009643337689340115, + "fcm_dpo/delta": -0.05405785143375397, + "fcm_dpo/margin": 468.2989501953125, + "fcm_dpo/q_t": 0.40109044313430786, + "grad_norm": 38.15021896362305, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -0.9976146817207336, + "logits/rejected": -0.9900000095367432, + "logps/chosen": -708.1666259765625, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32559967041016, + "logps/rejected": -1206.3765869140625, + "loss": 1.0822, + "margin_dpo/margin_mean": 468.2989501953125, + "margin_dpo/margin_std": 690.9556274414062, + "step": 603 + }, + { + "KL/chosen_KL_mean": -629.3424072265625, + "KL/mean": -877.6458740234375, + "KL/rejected_KL_mean": -1125.9493408203125, + "KL/std": 553.5061645507812, + "epoch": 0.8869309838472834, + "fcm_dpo/beta": 0.0009511223761364818, + "fcm_dpo/delta": -0.07592622190713882, + "fcm_dpo/margin": 496.60693359375, + "fcm_dpo/q_t": 0.3955162465572357, + "grad_norm": 38.61325454711914, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -0.9782446622848511, + "logits/rejected": -1.012909173965454, + "logps/chosen": -675.800537109375, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -1217.8038330078125, + "loss": 1.0656, + "margin_dpo/margin_mean": 496.60693359375, + "margin_dpo/margin_std": 690.3172607421875, + "step": 604 + }, + { + "KL/chosen_KL_mean": -670.883544921875, + "KL/mean": -858.872314453125, + "KL/rejected_KL_mean": -1046.861083984375, + "KL/std": 531.7296752929688, + "epoch": 0.8883994126284875, + "fcm_dpo/beta": 0.0009511103853583336, + "fcm_dpo/delta": 0.04394224286079407, + "fcm_dpo/margin": 375.9776611328125, + "fcm_dpo/q_t": 0.4207463264465332, + "grad_norm": 34.01826095581055, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -0.9739004969596863, + "logits/rejected": -0.9862950444221497, + "logps/chosen": -737.1328125, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -1149.166015625, + "loss": 1.1403, + "margin_dpo/margin_mean": 375.9776306152344, + "margin_dpo/margin_std": 629.7609252929688, + "step": 605 + }, + { + "KL/chosen_KL_mean": -666.6454467773438, + "KL/mean": -881.18115234375, + "KL/rejected_KL_mean": -1095.7169189453125, + "KL/std": 542.5753173828125, + "epoch": 0.8898678414096917, + "fcm_dpo/beta": 0.0009582208003848791, + "fcm_dpo/delta": -0.011937655508518219, + "fcm_dpo/margin": 429.0714416503906, + "fcm_dpo/q_t": 0.40722784399986267, + "grad_norm": 25.309036254882812, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -0.9902355670928955, + "logits/rejected": -1.004211664199829, + "logps/chosen": -721.4645385742188, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37146759033203, + "logps/rejected": -1194.08837890625, + "loss": 1.0911, + "margin_dpo/margin_mean": 429.0714111328125, + "margin_dpo/margin_std": 591.7274169921875, + "step": 606 + }, + { + "KL/chosen_KL_mean": -685.667236328125, + "KL/mean": -858.60791015625, + "KL/rejected_KL_mean": -1031.548583984375, + "KL/std": 543.4450073242188, + "epoch": 0.8913362701908958, + "fcm_dpo/beta": 0.0009662234224379063, + "fcm_dpo/delta": 0.06767666339874268, + "fcm_dpo/margin": 345.88134765625, + "fcm_dpo/q_t": 0.42443907260894775, + "grad_norm": 29.7037353515625, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -0.9784862399101257, + "logits/rejected": -0.9676879048347473, + "logps/chosen": -743.7513427734375, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -1111.32568359375, + "loss": 1.1544, + "margin_dpo/margin_mean": 345.88134765625, + "margin_dpo/margin_std": 584.5785522460938, + "step": 607 + }, + { + "KL/chosen_KL_mean": -619.2991943359375, + "KL/mean": -820.6446533203125, + "KL/rejected_KL_mean": -1021.9901733398438, + "KL/std": 493.3348388671875, + "epoch": 0.8928046989720999, + "fcm_dpo/beta": 0.0009703817777335644, + "fcm_dpo/delta": 0.009458957239985466, + "fcm_dpo/margin": 402.6909484863281, + "fcm_dpo/q_t": 0.409574419260025, + "grad_norm": 36.573951721191406, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -1.0082218647003174, + "logits/rejected": -1.019978642463684, + "logps/chosen": -676.75, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -1116.7635498046875, + "loss": 1.091, + "margin_dpo/margin_mean": 402.69097900390625, + "margin_dpo/margin_std": 511.0771179199219, + "step": 608 + }, + { + "KL/chosen_KL_mean": -638.9405517578125, + "KL/mean": -896.8486328125, + "KL/rejected_KL_mean": -1154.756591796875, + "KL/std": 665.662841796875, + "epoch": 0.8942731277533039, + "fcm_dpo/beta": 0.0009552284609526396, + "fcm_dpo/delta": -0.09746446460485458, + "fcm_dpo/margin": 515.8161010742188, + "fcm_dpo/q_t": 0.395630419254303, + "grad_norm": 28.152240753173828, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -1.0558668375015259, + "logits/rejected": -1.0789850950241089, + "logps/chosen": -697.7459716796875, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -1243.57275390625, + "loss": 1.0668, + "margin_dpo/margin_mean": 515.8161010742188, + "margin_dpo/margin_std": 792.5299682617188, + "step": 609 + }, + { + "KL/chosen_KL_mean": -631.236083984375, + "KL/mean": -791.146484375, + "KL/rejected_KL_mean": -951.0569458007812, + "KL/std": 500.4407653808594, + "epoch": 0.895741556534508, + "fcm_dpo/beta": 0.0009649534476920962, + "fcm_dpo/delta": 0.09404957294464111, + "fcm_dpo/margin": 319.82086181640625, + "fcm_dpo/q_t": 0.42811504006385803, + "grad_norm": 38.88047409057617, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.9684814214706421, + "logits/rejected": -0.9458719491958618, + "logps/chosen": -696.9310913085938, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.40538787841797, + "logps/rejected": -1034.46240234375, + "loss": 1.175, + "margin_dpo/margin_mean": 319.8208923339844, + "margin_dpo/margin_std": 581.705078125, + "step": 610 + }, + { + "KL/chosen_KL_mean": -665.9498291015625, + "KL/mean": -936.8076171875, + "KL/rejected_KL_mean": -1207.665283203125, + "KL/std": 653.8685302734375, + "epoch": 0.8972099853157122, + "fcm_dpo/beta": 0.0009503072360530496, + "fcm_dpo/delta": -0.12105247378349304, + "fcm_dpo/margin": 541.71533203125, + "fcm_dpo/q_t": 0.38792964816093445, + "grad_norm": 27.193374633789062, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -1.0235629081726074, + "logits/rejected": -1.0670585632324219, + "logps/chosen": -718.54931640625, + "logps/ref_chosen": -52.59946823120117, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -1293.9962158203125, + "loss": 1.0461, + "margin_dpo/margin_mean": 541.71533203125, + "margin_dpo/margin_std": 745.772705078125, + "step": 611 + }, + { + "KL/chosen_KL_mean": -726.8673095703125, + "KL/mean": -931.6278076171875, + "KL/rejected_KL_mean": -1136.388427734375, + "KL/std": 533.6715087890625, + "epoch": 0.8986784140969163, + "fcm_dpo/beta": 0.000949513225350529, + "fcm_dpo/delta": 0.01131674274802208, + "fcm_dpo/margin": 409.5210266113281, + "fcm_dpo/q_t": 0.4112043082714081, + "grad_norm": 31.31951141357422, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -1.0205453634262085, + "logits/rejected": -1.0193266868591309, + "logps/chosen": -786.1910400390625, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -1224.70068359375, + "loss": 1.1061, + "margin_dpo/margin_mean": 409.5210266113281, + "margin_dpo/margin_std": 582.0534057617188, + "step": 612 + }, + { + "KL/chosen_KL_mean": -618.98388671875, + "KL/mean": -881.4771728515625, + "KL/rejected_KL_mean": -1143.9703369140625, + "KL/std": 597.9173583984375, + "epoch": 0.9001468428781204, + "fcm_dpo/beta": 0.0009315350907854736, + "fcm_dpo/delta": -0.09393209218978882, + "fcm_dpo/margin": 524.9864501953125, + "fcm_dpo/q_t": 0.3888673782348633, + "grad_norm": 34.82392883300781, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -1.0204041004180908, + "logits/rejected": -1.0734975337982178, + "logps/chosen": -678.7138671875, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10752868652344, + "logps/rejected": -1249.077880859375, + "loss": 1.0324, + "margin_dpo/margin_mean": 524.9863891601562, + "margin_dpo/margin_std": 627.1439208984375, + "step": 613 + }, + { + "KL/chosen_KL_mean": -688.460693359375, + "KL/mean": -937.7681884765625, + "KL/rejected_KL_mean": -1187.07568359375, + "KL/std": 594.8397827148438, + "epoch": 0.9016152716593245, + "fcm_dpo/beta": 0.000922086532227695, + "fcm_dpo/delta": -0.06259925663471222, + "fcm_dpo/margin": 498.6150207519531, + "fcm_dpo/q_t": 0.39733168482780457, + "grad_norm": 43.358585357666016, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -1.0039961338043213, + "logits/rejected": -1.0779341459274292, + "logps/chosen": -741.399658203125, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -1291.755126953125, + "loss": 1.0741, + "margin_dpo/margin_mean": 498.614990234375, + "margin_dpo/margin_std": 705.9459228515625, + "step": 614 + }, + { + "KL/chosen_KL_mean": -664.404541015625, + "KL/mean": -871.7774047851562, + "KL/rejected_KL_mean": -1079.150146484375, + "KL/std": 616.7755126953125, + "epoch": 0.9030837004405287, + "fcm_dpo/beta": 0.0009228853159584105, + "fcm_dpo/delta": 0.017665421590209007, + "fcm_dpo/margin": 414.74560546875, + "fcm_dpo/q_t": 0.4132547974586487, + "grad_norm": 27.085163116455078, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -0.9974070191383362, + "logits/rejected": -1.0210152864456177, + "logps/chosen": -730.2218627929688, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -1174.32763671875, + "loss": 1.1269, + "margin_dpo/margin_mean": 414.7456359863281, + "margin_dpo/margin_std": 670.7782592773438, + "step": 615 + }, + { + "KL/chosen_KL_mean": -786.2246704101562, + "KL/mean": -949.08154296875, + "KL/rejected_KL_mean": -1111.9384765625, + "KL/std": 518.137451171875, + "epoch": 0.9045521292217328, + "fcm_dpo/beta": 0.00093449791893363, + "fcm_dpo/delta": 0.09862032532691956, + "fcm_dpo/margin": 325.71380615234375, + "fcm_dpo/q_t": 0.430539608001709, + "grad_norm": 33.06167221069336, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -1.0744967460632324, + "logits/rejected": -1.0585415363311768, + "logps/chosen": -851.3575439453125, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -1186.638916015625, + "loss": 1.1748, + "margin_dpo/margin_mean": 325.71380615234375, + "margin_dpo/margin_std": 578.4287719726562, + "step": 616 + }, + { + "KL/chosen_KL_mean": -737.30859375, + "KL/mean": -861.140625, + "KL/rejected_KL_mean": -984.97265625, + "KL/std": 481.3660583496094, + "epoch": 0.9060205580029369, + "fcm_dpo/beta": 0.0009459134307689965, + "fcm_dpo/delta": 0.07147952169179916, + "fcm_dpo/margin": 247.66403198242188, + "fcm_dpo/q_t": 0.44637531042099, + "grad_norm": 58.81951141357422, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -0.9604687690734863, + "logits/rejected": -0.9302307367324829, + "logps/chosen": -800.3141479492188, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -1049.206787109375, + "loss": 1.2335, + "margin_dpo/margin_mean": 247.66403198242188, + "margin_dpo/margin_std": 561.760009765625, + "step": 617 + }, + { + "KL/chosen_KL_mean": -777.463623046875, + "KL/mean": -1048.628662109375, + "KL/rejected_KL_mean": -1319.7938232421875, + "KL/std": 667.0958251953125, + "epoch": 0.9074889867841409, + "fcm_dpo/beta": 0.0009419023990631104, + "fcm_dpo/delta": -0.11681665480136871, + "fcm_dpo/margin": 542.3301391601562, + "fcm_dpo/q_t": 0.39000076055526733, + "grad_norm": 38.52542495727539, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -0.9963364601135254, + "logits/rejected": -1.0328341722488403, + "logps/chosen": -844.5650024414062, + "logps/ref_chosen": -67.10134887695312, + "logps/ref_rejected": -92.15340423583984, + "logps/rejected": -1411.947265625, + "loss": 1.0842, + "margin_dpo/margin_mean": 542.3301391601562, + "margin_dpo/margin_std": 860.1188354492188, + "step": 618 + }, + { + "KL/chosen_KL_mean": -769.1409912109375, + "KL/mean": -969.6728515625, + "KL/rejected_KL_mean": -1170.204833984375, + "KL/std": 620.4271240234375, + "epoch": 0.908957415565345, + "fcm_dpo/beta": 0.0009351515327580273, + "fcm_dpo/delta": 0.025874076411128044, + "fcm_dpo/margin": 401.0638427734375, + "fcm_dpo/q_t": 0.42254310846328735, + "grad_norm": 50.947975158691406, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -0.9994246959686279, + "logits/rejected": -1.0098530054092407, + "logps/chosen": -825.1192016601562, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -1263.39013671875, + "loss": 1.1798, + "margin_dpo/margin_mean": 401.0638427734375, + "margin_dpo/margin_std": 831.8917236328125, + "step": 619 + }, + { + "KL/chosen_KL_mean": -677.9056396484375, + "KL/mean": -864.861328125, + "KL/rejected_KL_mean": -1051.817138671875, + "KL/std": 525.7140502929688, + "epoch": 0.9104258443465492, + "fcm_dpo/beta": 0.0009386817691847682, + "fcm_dpo/delta": 0.05065443366765976, + "fcm_dpo/margin": 373.9115295410156, + "fcm_dpo/q_t": 0.4203672409057617, + "grad_norm": 34.1131706237793, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -1.036217451095581, + "logits/rejected": -1.0394688844680786, + "logps/chosen": -737.703125, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -1130.2279052734375, + "loss": 1.1384, + "margin_dpo/margin_mean": 373.9115295410156, + "margin_dpo/margin_std": 589.354248046875, + "step": 620 + }, + { + "KL/chosen_KL_mean": -688.1146240234375, + "KL/mean": -972.752197265625, + "KL/rejected_KL_mean": -1257.3896484375, + "KL/std": 649.8193359375, + "epoch": 0.9118942731277533, + "fcm_dpo/beta": 0.0009327299194410443, + "fcm_dpo/delta": -0.1384207010269165, + "fcm_dpo/margin": 569.275146484375, + "fcm_dpo/q_t": 0.3806772232055664, + "grad_norm": 41.840362548828125, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -1.082035779953003, + "logits/rejected": -1.1331275701522827, + "logps/chosen": -742.04833984375, + "logps/ref_chosen": -53.93375778198242, + "logps/ref_rejected": -88.36951446533203, + "logps/rejected": -1345.75927734375, + "loss": 1.0289, + "margin_dpo/margin_mean": 569.275146484375, + "margin_dpo/margin_std": 718.2183227539062, + "step": 621 + }, + { + "KL/chosen_KL_mean": -670.6583251953125, + "KL/mean": -874.29296875, + "KL/rejected_KL_mean": -1077.927490234375, + "KL/std": 506.2366943359375, + "epoch": 0.9133627019089574, + "fcm_dpo/beta": 0.0009187752148136497, + "fcm_dpo/delta": 0.026527073234319687, + "fcm_dpo/margin": 407.2692565917969, + "fcm_dpo/q_t": 0.41589581966400146, + "grad_norm": 29.52936553955078, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.9557490348815918, + "logits/rejected": -0.944530189037323, + "logps/chosen": -730.9442138671875, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -1163.4462890625, + "loss": 1.1216, + "margin_dpo/margin_mean": 407.26922607421875, + "margin_dpo/margin_std": 625.4295654296875, + "step": 622 + }, + { + "KL/chosen_KL_mean": -723.661376953125, + "KL/mean": -879.453369140625, + "KL/rejected_KL_mean": -1035.2454833984375, + "KL/std": 520.5437622070312, + "epoch": 0.9148311306901615, + "fcm_dpo/beta": 0.0009395014494657516, + "fcm_dpo/delta": 0.11050058901309967, + "fcm_dpo/margin": 311.5841064453125, + "fcm_dpo/q_t": 0.4357511103153229, + "grad_norm": 37.05131912231445, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -1.0592715740203857, + "logits/rejected": -1.0595531463623047, + "logps/chosen": -787.8182983398438, + "logps/ref_chosen": -64.1569595336914, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -1120.3284912109375, + "loss": 1.1952, + "margin_dpo/margin_mean": 311.5841369628906, + "margin_dpo/margin_std": 634.1216430664062, + "step": 623 + }, + { + "KL/chosen_KL_mean": -728.6787719726562, + "KL/mean": -973.8768920898438, + "KL/rejected_KL_mean": -1219.074951171875, + "KL/std": 544.4788818359375, + "epoch": 0.9162995594713657, + "fcm_dpo/beta": 0.0009369177860207856, + "fcm_dpo/delta": -0.062284573912620544, + "fcm_dpo/margin": 490.396240234375, + "fcm_dpo/q_t": 0.3928346335887909, + "grad_norm": 32.84885787963867, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -1.0861725807189941, + "logits/rejected": -1.088505506515503, + "logps/chosen": -800.597412109375, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -1316.20703125, + "loss": 1.0496, + "margin_dpo/margin_mean": 490.39617919921875, + "margin_dpo/margin_std": 585.4035034179688, + "step": 624 + }, + { + "KL/chosen_KL_mean": -670.5234375, + "KL/mean": -930.5636596679688, + "KL/rejected_KL_mean": -1190.603759765625, + "KL/std": 588.0369873046875, + "epoch": 0.9177679882525698, + "fcm_dpo/beta": 0.0009238402126356959, + "fcm_dpo/delta": -0.08445164561271667, + "fcm_dpo/margin": 520.080322265625, + "fcm_dpo/q_t": 0.38963061571121216, + "grad_norm": 70.59078979492188, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -0.9677177667617798, + "logits/rejected": -0.9921514391899109, + "logps/chosen": -728.8655395507812, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -1276.6942138671875, + "loss": 1.0202, + "margin_dpo/margin_mean": 520.080322265625, + "margin_dpo/margin_std": 548.3095703125, + "step": 625 + }, + { + "KL/chosen_KL_mean": -836.838623046875, + "KL/mean": -983.3013305664062, + "KL/rejected_KL_mean": -1129.763916015625, + "KL/std": 631.4542236328125, + "epoch": 0.9192364170337739, + "fcm_dpo/beta": 0.0009358528186567128, + "fcm_dpo/delta": 0.12933696806430817, + "fcm_dpo/margin": 292.92529296875, + "fcm_dpo/q_t": 0.43587183952331543, + "grad_norm": 34.235252380371094, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.9614785313606262, + "logits/rejected": -0.9531521797180176, + "logps/chosen": -911.9512939453125, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.188720703125, + "logps/rejected": -1228.95263671875, + "loss": 1.2146, + "margin_dpo/margin_mean": 292.92529296875, + "margin_dpo/margin_std": 614.7378540039062, + "step": 626 + }, + { + "KL/chosen_KL_mean": -566.9566650390625, + "KL/mean": -861.046875, + "KL/rejected_KL_mean": -1155.136962890625, + "KL/std": 695.9779052734375, + "epoch": 0.920704845814978, + "fcm_dpo/beta": 0.0009262310341000557, + "fcm_dpo/delta": -0.15298572182655334, + "fcm_dpo/margin": 588.1803588867188, + "fcm_dpo/q_t": 0.38598155975341797, + "grad_norm": 32.391971588134766, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.9232186079025269, + "logits/rejected": -0.9977039098739624, + "logps/chosen": -614.6998291015625, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -1261.8914794921875, + "loss": 1.0311, + "margin_dpo/margin_mean": 588.1802978515625, + "margin_dpo/margin_std": 796.7737426757812, + "step": 627 + }, + { + "KL/chosen_KL_mean": -744.3592529296875, + "KL/mean": -966.3853759765625, + "KL/rejected_KL_mean": -1188.41162109375, + "KL/std": 574.35107421875, + "epoch": 0.922173274596182, + "fcm_dpo/beta": 0.000911533716134727, + "fcm_dpo/delta": -0.004994707182049751, + "fcm_dpo/margin": 444.0523681640625, + "fcm_dpo/q_t": 0.4087638854980469, + "grad_norm": 32.3719367980957, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -1.0520081520080566, + "logits/rejected": -1.065995454788208, + "logps/chosen": -804.5421752929688, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -1289.96630859375, + "loss": 1.1065, + "margin_dpo/margin_mean": 444.0523681640625, + "margin_dpo/margin_std": 661.4337158203125, + "step": 628 + }, + { + "KL/chosen_KL_mean": -735.479248046875, + "KL/mean": -940.03759765625, + "KL/rejected_KL_mean": -1144.595947265625, + "KL/std": 554.8238525390625, + "epoch": 0.9236417033773862, + "fcm_dpo/beta": 0.0009158846805803478, + "fcm_dpo/delta": 0.026277855038642883, + "fcm_dpo/margin": 409.11669921875, + "fcm_dpo/q_t": 0.4125128388404846, + "grad_norm": 35.62141036987305, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -1.0496397018432617, + "logits/rejected": -1.0647929906845093, + "logps/chosen": -799.6928100585938, + "logps/ref_chosen": -64.21354675292969, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -1236.2496337890625, + "loss": 1.1059, + "margin_dpo/margin_mean": 409.11669921875, + "margin_dpo/margin_std": 548.577880859375, + "step": 629 + }, + { + "KL/chosen_KL_mean": -664.2507934570312, + "KL/mean": -786.7623291015625, + "KL/rejected_KL_mean": -909.27392578125, + "KL/std": 577.2899169921875, + "epoch": 0.9251101321585903, + "fcm_dpo/beta": 0.0009212232544086874, + "fcm_dpo/delta": 0.045750658959150314, + "fcm_dpo/margin": 245.02310180664062, + "fcm_dpo/q_t": 0.4528850317001343, + "grad_norm": 56.84680938720703, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -1.043975830078125, + "logits/rejected": -1.0191277265548706, + "logps/chosen": -723.5418090820312, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -992.8721923828125, + "loss": 1.2698, + "margin_dpo/margin_mean": 245.02308654785156, + "margin_dpo/margin_std": 707.3883056640625, + "step": 630 + }, + { + "KL/chosen_KL_mean": -800.809814453125, + "KL/mean": -987.3739013671875, + "KL/rejected_KL_mean": -1173.93798828125, + "KL/std": 571.5894775390625, + "epoch": 0.9265785609397944, + "fcm_dpo/beta": 0.0009341588011011481, + "fcm_dpo/delta": 0.053176864981651306, + "fcm_dpo/margin": 373.1282043457031, + "fcm_dpo/q_t": 0.4202990233898163, + "grad_norm": 33.039405822753906, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -1.1113148927688599, + "logits/rejected": -1.120398759841919, + "logps/chosen": -860.263427734375, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95156860351562, + "logps/rejected": -1254.8896484375, + "loss": 1.1597, + "margin_dpo/margin_mean": 373.1282043457031, + "margin_dpo/margin_std": 670.2313232421875, + "step": 631 + }, + { + "KL/chosen_KL_mean": -700.687744140625, + "KL/mean": -902.2703857421875, + "KL/rejected_KL_mean": -1103.85302734375, + "KL/std": 529.5032348632812, + "epoch": 0.9280469897209985, + "fcm_dpo/beta": 0.0009340323740616441, + "fcm_dpo/delta": 0.023960798978805542, + "fcm_dpo/margin": 403.1651916503906, + "fcm_dpo/q_t": 0.4138449430465698, + "grad_norm": 43.67294692993164, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -1.0655746459960938, + "logits/rejected": -1.0689226388931274, + "logps/chosen": -762.039306640625, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -1190.01318359375, + "loss": 1.1318, + "margin_dpo/margin_mean": 403.16522216796875, + "margin_dpo/margin_std": 643.6358642578125, + "step": 632 + }, + { + "KL/chosen_KL_mean": -774.7955322265625, + "KL/mean": -919.6793212890625, + "KL/rejected_KL_mean": -1064.563232421875, + "KL/std": 531.289306640625, + "epoch": 0.9295154185022027, + "fcm_dpo/beta": 0.0009426448959857225, + "fcm_dpo/delta": 0.02990627847611904, + "fcm_dpo/margin": 289.7676696777344, + "fcm_dpo/q_t": 0.437002032995224, + "grad_norm": 47.90824508666992, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -1.116697072982788, + "logits/rejected": -1.1289957761764526, + "logps/chosen": -832.0736694335938, + "logps/ref_chosen": -57.27816390991211, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -1156.147216796875, + "loss": 1.2107, + "margin_dpo/margin_mean": 289.76763916015625, + "margin_dpo/margin_std": 609.2022094726562, + "step": 633 + }, + { + "KL/chosen_KL_mean": -864.3818969726562, + "KL/mean": -987.146728515625, + "KL/rejected_KL_mean": -1109.9114990234375, + "KL/std": 618.4544677734375, + "epoch": 0.9309838472834068, + "fcm_dpo/beta": 0.0009592788992449641, + "fcm_dpo/delta": 0.0725301131606102, + "fcm_dpo/margin": 245.5295867919922, + "fcm_dpo/q_t": 0.44761770963668823, + "grad_norm": 36.095970153808594, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -0.9695584774017334, + "logits/rejected": -0.9623770117759705, + "logps/chosen": -931.0008544921875, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12564849853516, + "logps/rejected": -1217.037109375, + "loss": 1.2439, + "margin_dpo/margin_mean": 245.52960205078125, + "margin_dpo/margin_std": 606.821533203125, + "step": 634 + }, + { + "KL/chosen_KL_mean": -748.3845825195312, + "KL/mean": -925.947509765625, + "KL/rejected_KL_mean": -1103.51025390625, + "KL/std": 645.3294067382812, + "epoch": 0.9324522760646109, + "fcm_dpo/beta": 0.0009680173825472593, + "fcm_dpo/delta": 0.05820862203836441, + "fcm_dpo/margin": 355.125732421875, + "fcm_dpo/q_t": 0.42416542768478394, + "grad_norm": 35.836708068847656, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -1.0464322566986084, + "logits/rejected": -1.0263543128967285, + "logps/chosen": -822.340087890625, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -1186.0107421875, + "loss": 1.1569, + "margin_dpo/margin_mean": 355.125732421875, + "margin_dpo/margin_std": 636.2976684570312, + "step": 635 + }, + { + "KL/chosen_KL_mean": -698.6964111328125, + "KL/mean": -911.1131591796875, + "KL/rejected_KL_mean": -1123.5299072265625, + "KL/std": 614.4910888671875, + "epoch": 0.933920704845815, + "fcm_dpo/beta": 0.0009746984578669071, + "fcm_dpo/delta": -0.015476349741220474, + "fcm_dpo/margin": 424.83349609375, + "fcm_dpo/q_t": 0.4083341956138611, + "grad_norm": 29.87440299987793, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -1.0113909244537354, + "logits/rejected": -1.0161449909210205, + "logps/chosen": -758.3253173828125, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -1205.5087890625, + "loss": 1.0972, + "margin_dpo/margin_mean": 424.83349609375, + "margin_dpo/margin_std": 628.7258911132812, + "step": 636 + }, + { + "KL/chosen_KL_mean": -713.9525146484375, + "KL/mean": -948.3504638671875, + "KL/rejected_KL_mean": -1182.74853515625, + "KL/std": 626.5479736328125, + "epoch": 0.9353891336270191, + "fcm_dpo/beta": 0.0009556564618833363, + "fcm_dpo/delta": -0.050962455570697784, + "fcm_dpo/margin": 468.7959899902344, + "fcm_dpo/q_t": 0.4004845917224884, + "grad_norm": 30.655946731567383, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -1.1256394386291504, + "logits/rejected": -1.1713881492614746, + "logps/chosen": -763.6051635742188, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -1281.153564453125, + "loss": 1.0745, + "margin_dpo/margin_mean": 468.79595947265625, + "margin_dpo/margin_std": 649.401123046875, + "step": 637 + }, + { + "KL/chosen_KL_mean": -686.6837768554688, + "KL/mean": -873.9114990234375, + "KL/rejected_KL_mean": -1061.1392822265625, + "KL/std": 583.2347412109375, + "epoch": 0.9368575624082232, + "fcm_dpo/beta": 0.0009507788345217705, + "fcm_dpo/delta": -0.07280878722667694, + "fcm_dpo/margin": 374.45550537109375, + "fcm_dpo/q_t": 0.41397538781166077, + "grad_norm": 36.20570755004883, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.9657926559448242, + "logits/rejected": -0.9476113319396973, + "logps/chosen": -744.8404541015625, + "logps/ref_chosen": -58.156639099121094, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -1140.4407958984375, + "loss": 1.1633, + "margin_dpo/margin_mean": 374.45550537109375, + "margin_dpo/margin_std": 676.257568359375, + "step": 638 + }, + { + "KL/chosen_KL_mean": -945.5469360351562, + "KL/mean": -1030.381591796875, + "KL/rejected_KL_mean": -1115.21630859375, + "KL/std": 560.040283203125, + "epoch": 0.9383259911894273, + "fcm_dpo/beta": 0.0009438564302399755, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 169.6693115234375, + "fcm_dpo/q_t": 0.46391725540161133, + "grad_norm": 108.60933685302734, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -0.9735069274902344, + "logits/rejected": -0.9152404069900513, + "logps/chosen": -1017.8701171875, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -1189.4912109375, + "loss": 1.3368, + "margin_dpo/margin_mean": 169.6693115234375, + "margin_dpo/margin_std": 701.151611328125, + "step": 639 + }, + { + "KL/chosen_KL_mean": -711.00732421875, + "KL/mean": -993.779541015625, + "KL/rejected_KL_mean": -1276.5517578125, + "KL/std": 650.807373046875, + "epoch": 0.9397944199706314, + "fcm_dpo/beta": 0.0009295439813286066, + "fcm_dpo/delta": -0.13248543441295624, + "fcm_dpo/margin": 565.5443725585938, + "fcm_dpo/q_t": 0.3859960734844208, + "grad_norm": 46.08125305175781, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.9270308613777161, + "logits/rejected": -0.9523489475250244, + "logps/chosen": -767.1417236328125, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -1385.15185546875, + "loss": 1.0366, + "margin_dpo/margin_mean": 565.5443725585938, + "margin_dpo/margin_std": 759.7664794921875, + "step": 640 + }, + { + "KL/chosen_KL_mean": -829.0867919921875, + "KL/mean": -1006.7977294921875, + "KL/rejected_KL_mean": -1184.5086669921875, + "KL/std": 552.3861083984375, + "epoch": 0.9412628487518355, + "fcm_dpo/beta": 0.000928039662539959, + "fcm_dpo/delta": 0.0726061537861824, + "fcm_dpo/margin": 355.4219055175781, + "fcm_dpo/q_t": 0.4274270534515381, + "grad_norm": 28.046123504638672, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -1.041335940361023, + "logits/rejected": -1.0468769073486328, + "logps/chosen": -894.083740234375, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -1271.5009765625, + "loss": 1.1886, + "margin_dpo/margin_mean": 355.42193603515625, + "margin_dpo/margin_std": 733.6763916015625, + "step": 641 + }, + { + "KL/chosen_KL_mean": -794.72314453125, + "KL/mean": -1015.3919677734375, + "KL/rejected_KL_mean": -1236.060791015625, + "KL/std": 731.3734130859375, + "epoch": 0.9427312775330396, + "fcm_dpo/beta": 0.0009348751045763493, + "fcm_dpo/delta": -0.013475339859724045, + "fcm_dpo/margin": 441.3376770019531, + "fcm_dpo/q_t": 0.4158053398132324, + "grad_norm": 40.215126037597656, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.9885178804397583, + "logits/rejected": -1.0216963291168213, + "logps/chosen": -860.412353515625, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -1346.3028564453125, + "loss": 1.1535, + "margin_dpo/margin_mean": 441.33770751953125, + "margin_dpo/margin_std": 880.50634765625, + "step": 642 + }, + { + "KL/chosen_KL_mean": -712.148193359375, + "KL/mean": -884.007080078125, + "KL/rejected_KL_mean": -1055.865966796875, + "KL/std": 530.5042724609375, + "epoch": 0.9441997063142438, + "fcm_dpo/beta": 0.0009251800365746021, + "fcm_dpo/delta": -0.02657410502433777, + "fcm_dpo/margin": 343.71783447265625, + "fcm_dpo/q_t": 0.42580801248550415, + "grad_norm": 39.23821258544922, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -1.1170873641967773, + "logits/rejected": -1.1149516105651855, + "logps/chosen": -764.09814453125, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -1143.3343505859375, + "loss": 1.1537, + "margin_dpo/margin_mean": 343.71783447265625, + "margin_dpo/margin_std": 526.9955444335938, + "step": 643 + }, + { + "KL/chosen_KL_mean": -688.5614624023438, + "KL/mean": -862.1695556640625, + "KL/rejected_KL_mean": -1035.777587890625, + "KL/std": 615.8262939453125, + "epoch": 0.9456681350954479, + "fcm_dpo/beta": 0.0009356926893815398, + "fcm_dpo/delta": 0.07765576243400574, + "fcm_dpo/margin": 347.2160949707031, + "fcm_dpo/q_t": 0.4279705882072449, + "grad_norm": 38.85586166381836, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -0.9950805306434631, + "logits/rejected": -1.0022577047348022, + "logps/chosen": -747.5791015625, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -1122.914306640625, + "loss": 1.1888, + "margin_dpo/margin_mean": 347.2160949707031, + "margin_dpo/margin_std": 702.9293212890625, + "step": 644 + }, + { + "KL/chosen_KL_mean": -722.38525390625, + "KL/mean": -811.447509765625, + "KL/rejected_KL_mean": -900.5097045898438, + "KL/std": 496.90313720703125, + "epoch": 0.947136563876652, + "fcm_dpo/beta": 0.0009528464288450778, + "fcm_dpo/delta": 0.06967134773731232, + "fcm_dpo/margin": 178.12442016601562, + "fcm_dpo/q_t": 0.4628395438194275, + "grad_norm": 89.61138916015625, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -0.9825261831283569, + "logits/rejected": -0.968986988067627, + "logps/chosen": -778.2612915039062, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -998.29052734375, + "loss": 1.3289, + "margin_dpo/margin_mean": 178.12442016601562, + "margin_dpo/margin_std": 673.5927734375, + "step": 645 + }, + { + "KL/chosen_KL_mean": -689.6497802734375, + "KL/mean": -844.453857421875, + "KL/rejected_KL_mean": -999.2579956054688, + "KL/std": 476.8377990722656, + "epoch": 0.9486049926578561, + "fcm_dpo/beta": 0.0009646883700042963, + "fcm_dpo/delta": 0.10456812381744385, + "fcm_dpo/margin": 309.60821533203125, + "fcm_dpo/q_t": 0.4325307607650757, + "grad_norm": 46.54256057739258, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -1.0059431791305542, + "logits/rejected": -0.9890854954719543, + "logps/chosen": -750.925537109375, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -1076.7637939453125, + "loss": 1.1834, + "margin_dpo/margin_mean": 309.6082458496094, + "margin_dpo/margin_std": 569.5079345703125, + "step": 646 + }, + { + "KL/chosen_KL_mean": -608.05615234375, + "KL/mean": -797.486083984375, + "KL/rejected_KL_mean": -986.9160766601562, + "KL/std": 535.0508422851562, + "epoch": 0.9500734214390602, + "fcm_dpo/beta": 0.0009775401558727026, + "fcm_dpo/delta": 0.030786845833063126, + "fcm_dpo/margin": 378.85992431640625, + "fcm_dpo/q_t": 0.41445714235305786, + "grad_norm": 35.470096588134766, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.937364935874939, + "logits/rejected": -0.965479850769043, + "logps/chosen": -662.9085693359375, + "logps/ref_chosen": -54.8524169921875, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -1080.435546875, + "loss": 1.1137, + "margin_dpo/margin_mean": 378.85992431640625, + "margin_dpo/margin_std": 526.0119018554688, + "step": 647 + }, + { + "KL/chosen_KL_mean": -638.0758056640625, + "KL/mean": -920.2573852539062, + "KL/rejected_KL_mean": -1202.43896484375, + "KL/std": 642.937255859375, + "epoch": 0.9515418502202643, + "fcm_dpo/beta": 0.000957622891291976, + "fcm_dpo/delta": -0.14847612380981445, + "fcm_dpo/margin": 564.3631591796875, + "fcm_dpo/q_t": 0.3837600648403168, + "grad_norm": 27.33829116821289, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -1.0410782098770142, + "logits/rejected": -1.0981464385986328, + "logps/chosen": -692.247314453125, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.7127914428711, + "logps/rejected": -1301.15185546875, + "loss": 1.0285, + "margin_dpo/margin_mean": 564.3631591796875, + "margin_dpo/margin_std": 768.2374267578125, + "step": 648 + }, + { + "KL/chosen_KL_mean": -680.2294921875, + "KL/mean": -809.4317626953125, + "KL/rejected_KL_mean": -938.6341552734375, + "KL/std": 502.25543212890625, + "epoch": 0.9530102790014684, + "fcm_dpo/beta": 0.0009556890581734478, + "fcm_dpo/delta": 0.055874936282634735, + "fcm_dpo/margin": 258.4045715332031, + "fcm_dpo/q_t": 0.4464304447174072, + "grad_norm": 29.119403839111328, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -1.0787172317504883, + "logits/rejected": -1.0679619312286377, + "logps/chosen": -742.7098388671875, + "logps/ref_chosen": -62.480350494384766, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -1018.7113037109375, + "loss": 1.2415, + "margin_dpo/margin_mean": 258.4045715332031, + "margin_dpo/margin_std": 620.7310791015625, + "step": 649 + }, + { + "KL/chosen_KL_mean": -735.5718994140625, + "KL/mean": -945.16455078125, + "KL/rejected_KL_mean": -1154.757080078125, + "KL/std": 632.589599609375, + "epoch": 0.9544787077826725, + "fcm_dpo/beta": 0.0009581187041476369, + "fcm_dpo/delta": -0.001857999712228775, + "fcm_dpo/margin": 419.18524169921875, + "fcm_dpo/q_t": 0.4114704728126526, + "grad_norm": 34.31390380859375, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -1.0013569593429565, + "logits/rejected": -1.0217807292938232, + "logps/chosen": -791.6647338867188, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -1253.02197265625, + "loss": 1.1353, + "margin_dpo/margin_mean": 419.18524169921875, + "margin_dpo/margin_std": 727.79541015625, + "step": 650 + }, + { + "KL/chosen_KL_mean": -482.6407165527344, + "KL/mean": -760.6094970703125, + "KL/rejected_KL_mean": -1038.578369140625, + "KL/std": 583.9876708984375, + "epoch": 0.9559471365638766, + "fcm_dpo/beta": 0.000941460719332099, + "fcm_dpo/delta": -0.13033278286457062, + "fcm_dpo/margin": 555.9376220703125, + "fcm_dpo/q_t": 0.38108521699905396, + "grad_norm": 45.66421127319336, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -1.0007972717285156, + "logits/rejected": -1.059419870376587, + "logps/chosen": -526.066162109375, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.95791625976562, + "logps/rejected": -1138.5361328125, + "loss": 1.0005, + "margin_dpo/margin_mean": 555.9376220703125, + "margin_dpo/margin_std": 601.0250244140625, + "step": 651 + }, + { + "KL/chosen_KL_mean": -632.904052734375, + "KL/mean": -837.604736328125, + "KL/rejected_KL_mean": -1042.305419921875, + "KL/std": 583.210693359375, + "epoch": 0.9574155653450808, + "fcm_dpo/beta": 0.0009400760754942894, + "fcm_dpo/delta": 0.015617836266756058, + "fcm_dpo/margin": 409.4013671875, + "fcm_dpo/q_t": 0.4142574071884155, + "grad_norm": 32.21805953979492, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -1.0335817337036133, + "logits/rejected": -1.0595180988311768, + "logps/chosen": -695.4808959960938, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -1154.0732421875, + "loss": 1.1271, + "margin_dpo/margin_mean": 409.4013671875, + "margin_dpo/margin_std": 668.522216796875, + "step": 652 + }, + { + "KL/chosen_KL_mean": -756.5819091796875, + "KL/mean": -952.8492431640625, + "KL/rejected_KL_mean": -1149.116455078125, + "KL/std": 617.5569458007812, + "epoch": 0.9588839941262849, + "fcm_dpo/beta": 0.0009441639995202422, + "fcm_dpo/delta": 0.03045791946351528, + "fcm_dpo/margin": 392.53460693359375, + "fcm_dpo/q_t": 0.41772544384002686, + "grad_norm": 32.95127487182617, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -1.0887930393218994, + "logits/rejected": -1.1141128540039062, + "logps/chosen": -817.6948852539062, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -1252.3660888671875, + "loss": 1.1353, + "margin_dpo/margin_mean": 392.53460693359375, + "margin_dpo/margin_std": 648.3425903320312, + "step": 653 + }, + { + "KL/chosen_KL_mean": -682.6639404296875, + "KL/mean": -862.1605834960938, + "KL/rejected_KL_mean": -1041.6572265625, + "KL/std": 506.5113220214844, + "epoch": 0.960352422907489, + "fcm_dpo/beta": 0.0009525552159175277, + "fcm_dpo/delta": 0.06012295186519623, + "fcm_dpo/margin": 358.99334716796875, + "fcm_dpo/q_t": 0.42324844002723694, + "grad_norm": 34.27675247192383, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -1.0422253608703613, + "logits/rejected": -1.0543601512908936, + "logps/chosen": -744.39208984375, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -1140.43115234375, + "loss": 1.1455, + "margin_dpo/margin_mean": 358.9933776855469, + "margin_dpo/margin_std": 584.0943603515625, + "step": 654 + }, + { + "KL/chosen_KL_mean": -647.9940185546875, + "KL/mean": -838.0845947265625, + "KL/rejected_KL_mean": -1028.1751708984375, + "KL/std": 522.7816162109375, + "epoch": 0.9618208516886931, + "fcm_dpo/beta": 0.0009614527225494385, + "fcm_dpo/delta": 0.03576880693435669, + "fcm_dpo/margin": 380.1811828613281, + "fcm_dpo/q_t": 0.41819822788238525, + "grad_norm": 34.595184326171875, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -1.0417159795761108, + "logits/rejected": -1.0822257995605469, + "logps/chosen": -697.57080078125, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -1126.467041015625, + "loss": 1.136, + "margin_dpo/margin_mean": 380.1811828613281, + "margin_dpo/margin_std": 621.2908935546875, + "step": 655 + }, + { + "KL/chosen_KL_mean": -692.2393798828125, + "KL/mean": -1018.1945190429688, + "KL/rejected_KL_mean": -1344.149658203125, + "KL/std": 693.30029296875, + "epoch": 0.9632892804698973, + "fcm_dpo/beta": 0.0009351709159091115, + "fcm_dpo/delta": -0.2227155566215515, + "fcm_dpo/margin": 651.910400390625, + "fcm_dpo/q_t": 0.36535900831222534, + "grad_norm": 73.65252685546875, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.9182928800582886, + "logits/rejected": -0.9882034063339233, + "logps/chosen": -744.788818359375, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -1457.824462890625, + "loss": 0.9582, + "margin_dpo/margin_mean": 651.910400390625, + "margin_dpo/margin_std": 694.8057250976562, + "step": 656 + }, + { + "KL/chosen_KL_mean": -656.0784301757812, + "KL/mean": -918.288818359375, + "KL/rejected_KL_mean": -1180.499267578125, + "KL/std": 663.51416015625, + "epoch": 0.9647577092511013, + "fcm_dpo/beta": 0.0009086633799597621, + "fcm_dpo/delta": -0.08051308244466782, + "fcm_dpo/margin": 524.4208374023438, + "fcm_dpo/q_t": 0.39257729053497314, + "grad_norm": 42.068206787109375, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -1.087989330291748, + "logits/rejected": -1.1325247287750244, + "logps/chosen": -702.7789306640625, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -1278.4140625, + "loss": 1.0508, + "margin_dpo/margin_mean": 524.4208374023438, + "margin_dpo/margin_std": 682.353271484375, + "step": 657 + }, + { + "KL/chosen_KL_mean": -682.652099609375, + "KL/mean": -883.319580078125, + "KL/rejected_KL_mean": -1083.987060546875, + "KL/std": 494.90802001953125, + "epoch": 0.9662261380323054, + "fcm_dpo/beta": 0.000907151261344552, + "fcm_dpo/delta": 0.037033095955848694, + "fcm_dpo/margin": 401.3349609375, + "fcm_dpo/q_t": 0.4157608151435852, + "grad_norm": 31.92685890197754, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -1.0313150882720947, + "logits/rejected": -1.0605497360229492, + "logps/chosen": -743.6102905273438, + "logps/ref_chosen": -60.95820999145508, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -1179.926513671875, + "loss": 1.1177, + "margin_dpo/margin_mean": 401.3349609375, + "margin_dpo/margin_std": 558.94873046875, + "step": 658 + }, + { + "KL/chosen_KL_mean": -622.085205078125, + "KL/mean": -824.7811279296875, + "KL/rejected_KL_mean": -1027.47705078125, + "KL/std": 517.73681640625, + "epoch": 0.9676945668135095, + "fcm_dpo/beta": 0.0009139457251876593, + "fcm_dpo/delta": 0.030485082417726517, + "fcm_dpo/margin": 405.39166259765625, + "fcm_dpo/q_t": 0.41561028361320496, + "grad_norm": 32.02241516113281, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.9981797933578491, + "logits/rejected": -0.9754196405410767, + "logps/chosen": -698.8282470703125, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -1114.947998046875, + "loss": 1.114, + "margin_dpo/margin_mean": 405.39166259765625, + "margin_dpo/margin_std": 572.6546020507812, + "step": 659 + }, + { + "KL/chosen_KL_mean": -681.49755859375, + "KL/mean": -938.8291625976562, + "KL/rejected_KL_mean": -1196.1607666015625, + "KL/std": 619.5654907226562, + "epoch": 0.9691629955947136, + "fcm_dpo/beta": 0.0009104161872528493, + "fcm_dpo/delta": -0.0718650072813034, + "fcm_dpo/margin": 514.663330078125, + "fcm_dpo/q_t": 0.3939950466156006, + "grad_norm": 37.91978454589844, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -1.0615503787994385, + "logits/rejected": -1.0776853561401367, + "logps/chosen": -740.54541015625, + "logps/ref_chosen": -59.04788589477539, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -1272.120849609375, + "loss": 1.0543, + "margin_dpo/margin_mean": 514.663330078125, + "margin_dpo/margin_std": 662.3004150390625, + "step": 660 + }, + { + "KL/chosen_KL_mean": -598.23583984375, + "KL/mean": -829.0057983398438, + "KL/rejected_KL_mean": -1059.775634765625, + "KL/std": 670.0771484375, + "epoch": 0.9706314243759178, + "fcm_dpo/beta": 0.0009062248282134533, + "fcm_dpo/delta": -0.019264454022049904, + "fcm_dpo/margin": 461.5398254394531, + "fcm_dpo/q_t": 0.40717989206314087, + "grad_norm": 50.24213409423828, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -1.0732464790344238, + "logits/rejected": -1.0874643325805664, + "logps/chosen": -648.9098510742188, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -1145.7813720703125, + "loss": 1.0881, + "margin_dpo/margin_mean": 461.53985595703125, + "margin_dpo/margin_std": 661.6412963867188, + "step": 661 + }, + { + "KL/chosen_KL_mean": -688.9097900390625, + "KL/mean": -871.3480224609375, + "KL/rejected_KL_mean": -1053.786376953125, + "KL/std": 555.1720581054688, + "epoch": 0.9720998531571219, + "fcm_dpo/beta": 0.0009117955341935158, + "fcm_dpo/delta": 0.06958886981010437, + "fcm_dpo/margin": 364.87664794921875, + "fcm_dpo/q_t": 0.4251037836074829, + "grad_norm": 28.40976905822754, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -0.9545935392379761, + "logits/rejected": -0.9512023329734802, + "logps/chosen": -758.1707763671875, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -1142.84228515625, + "loss": 1.1688, + "margin_dpo/margin_mean": 364.87664794921875, + "margin_dpo/margin_std": 676.1181030273438, + "step": 662 + }, + { + "KL/chosen_KL_mean": -660.3707885742188, + "KL/mean": -882.6888427734375, + "KL/rejected_KL_mean": -1105.0068359375, + "KL/std": 639.6807250976562, + "epoch": 0.973568281938326, + "fcm_dpo/beta": 0.0009125665528699756, + "fcm_dpo/delta": -0.006048870272934437, + "fcm_dpo/margin": 444.63604736328125, + "fcm_dpo/q_t": 0.4114909768104553, + "grad_norm": 29.266870498657227, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -1.043156385421753, + "logits/rejected": -1.0693552494049072, + "logps/chosen": -725.249755859375, + "logps/ref_chosen": -64.87890625, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -1218.9322509765625, + "loss": 1.1201, + "margin_dpo/margin_mean": 444.63604736328125, + "margin_dpo/margin_std": 745.1632690429688, + "step": 663 + }, + { + "KL/chosen_KL_mean": -671.679931640625, + "KL/mean": -922.8211669921875, + "KL/rejected_KL_mean": -1173.96240234375, + "KL/std": 610.1594848632812, + "epoch": 0.9750367107195301, + "fcm_dpo/beta": 0.0009023561142385006, + "fcm_dpo/delta": -0.05629858374595642, + "fcm_dpo/margin": 502.282470703125, + "fcm_dpo/q_t": 0.3978724479675293, + "grad_norm": 29.01834487915039, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -1.0154392719268799, + "logits/rejected": -1.0499646663665771, + "logps/chosen": -732.568359375, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -1279.484130859375, + "loss": 1.063, + "margin_dpo/margin_mean": 502.2824401855469, + "margin_dpo/margin_std": 654.3357543945312, + "step": 664 + }, + { + "KL/chosen_KL_mean": -607.6499633789062, + "KL/mean": -818.168701171875, + "KL/rejected_KL_mean": -1028.6873779296875, + "KL/std": 506.8304138183594, + "epoch": 0.9765051395007343, + "fcm_dpo/beta": 0.0009059334406629205, + "fcm_dpo/delta": 0.01930341310799122, + "fcm_dpo/margin": 421.0374755859375, + "fcm_dpo/q_t": 0.4117254316806793, + "grad_norm": 45.89773941040039, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -1.088966965675354, + "logits/rejected": -1.1092216968536377, + "logps/chosen": -668.214111328125, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.80882263183594, + "logps/rejected": -1113.4962158203125, + "loss": 1.0939, + "margin_dpo/margin_mean": 421.0375061035156, + "margin_dpo/margin_std": 522.3038940429688, + "step": 665 + }, + { + "KL/chosen_KL_mean": -657.01708984375, + "KL/mean": -866.1459350585938, + "KL/rejected_KL_mean": -1075.274658203125, + "KL/std": 536.670166015625, + "epoch": 0.9779735682819384, + "fcm_dpo/beta": 0.0009081506868824363, + "fcm_dpo/delta": 0.020935581997036934, + "fcm_dpo/margin": 418.2576904296875, + "fcm_dpo/q_t": 0.4136677384376526, + "grad_norm": 33.37041091918945, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.9409841299057007, + "logits/rejected": -0.9495470523834229, + "logps/chosen": -721.43701171875, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.8916244506836, + "logps/rejected": -1171.1663818359375, + "loss": 1.1133, + "margin_dpo/margin_mean": 418.2577209472656, + "margin_dpo/margin_std": 598.1719970703125, + "step": 666 + }, + { + "KL/chosen_KL_mean": -755.029052734375, + "KL/mean": -1004.461669921875, + "KL/rejected_KL_mean": -1253.8944091796875, + "KL/std": 589.2806396484375, + "epoch": 0.9794419970631424, + "fcm_dpo/beta": 0.0009012054651975632, + "fcm_dpo/delta": -0.052186060696840286, + "fcm_dpo/margin": 498.8653259277344, + "fcm_dpo/q_t": 0.3979080319404602, + "grad_norm": 38.522857666015625, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -0.9830505847930908, + "logits/rejected": -0.9843896627426147, + "logps/chosen": -824.3060302734375, + "logps/ref_chosen": -69.27702331542969, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -1341.7298583984375, + "loss": 1.0648, + "margin_dpo/margin_mean": 498.86529541015625, + "margin_dpo/margin_std": 650.4932861328125, + "step": 667 + }, + { + "KL/chosen_KL_mean": -784.714111328125, + "KL/mean": -958.1055908203125, + "KL/rejected_KL_mean": -1131.4970703125, + "KL/std": 627.3168334960938, + "epoch": 0.9809104258443465, + "fcm_dpo/beta": 0.0009167675743810833, + "fcm_dpo/delta": 0.08393767476081848, + "fcm_dpo/margin": 346.7828674316406, + "fcm_dpo/q_t": 0.43127357959747314, + "grad_norm": 44.24299621582031, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -1.0314850807189941, + "logits/rejected": -1.0380046367645264, + "logps/chosen": -857.318115234375, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905944824219, + "logps/rejected": -1235.236083984375, + "loss": 1.2016, + "margin_dpo/margin_mean": 346.7828674316406, + "margin_dpo/margin_std": 747.9251098632812, + "step": 668 + }, + { + "KL/chosen_KL_mean": -601.5374755859375, + "KL/mean": -861.4692993164062, + "KL/rejected_KL_mean": -1121.401123046875, + "KL/std": 594.085693359375, + "epoch": 0.9823788546255506, + "fcm_dpo/beta": 0.0009072460234165192, + "fcm_dpo/delta": -0.07513141632080078, + "fcm_dpo/margin": 519.8635864257812, + "fcm_dpo/q_t": 0.3927006125450134, + "grad_norm": 29.531373977661133, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -1.0162256956100464, + "logits/rejected": -1.0264288187026978, + "logps/chosen": -647.6539306640625, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -1199.325439453125, + "loss": 1.0536, + "margin_dpo/margin_mean": 519.8635864257812, + "margin_dpo/margin_std": 669.6885375976562, + "step": 669 + }, + { + "KL/chosen_KL_mean": -569.926025390625, + "KL/mean": -822.7012939453125, + "KL/rejected_KL_mean": -1075.4765625, + "KL/std": 546.4443359375, + "epoch": 0.9838472834067548, + "fcm_dpo/beta": 0.0008986732573248446, + "fcm_dpo/delta": -0.057006560266017914, + "fcm_dpo/margin": 505.550537109375, + "fcm_dpo/q_t": 0.396476686000824, + "grad_norm": 30.942873001098633, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.9540762901306152, + "logits/rejected": -0.972830593585968, + "logps/chosen": -632.271728515625, + "logps/ref_chosen": -62.34575271606445, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -1172.4169921875, + "loss": 1.0656, + "margin_dpo/margin_mean": 505.550537109375, + "margin_dpo/margin_std": 658.2862548828125, + "step": 670 + }, + { + "KL/chosen_KL_mean": -731.1207275390625, + "KL/mean": -929.6273803710938, + "KL/rejected_KL_mean": -1128.134033203125, + "KL/std": 551.165283203125, + "epoch": 0.9853157121879589, + "fcm_dpo/beta": 0.0009013921953737736, + "fcm_dpo/delta": 0.0433029942214489, + "fcm_dpo/margin": 397.0133361816406, + "fcm_dpo/q_t": 0.4173119068145752, + "grad_norm": 34.9372673034668, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -1.0201672315597534, + "logits/rejected": -1.0463124513626099, + "logps/chosen": -779.120849609375, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -1211.953369140625, + "loss": 1.1447, + "margin_dpo/margin_mean": 397.0133361816406, + "margin_dpo/margin_std": 663.8474731445312, + "step": 671 + }, + { + "KL/chosen_KL_mean": -809.3194580078125, + "KL/mean": -1013.943603515625, + "KL/rejected_KL_mean": -1218.567626953125, + "KL/std": 674.4744873046875, + "epoch": 0.986784140969163, + "fcm_dpo/beta": 0.0009026298066601157, + "fcm_dpo/delta": 0.0317508839070797, + "fcm_dpo/margin": 409.248291015625, + "fcm_dpo/q_t": 0.418613076210022, + "grad_norm": 49.699440002441406, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -1.111328363418579, + "logits/rejected": -1.1195930242538452, + "logps/chosen": -867.9027099609375, + "logps/ref_chosen": -58.58328628540039, + "logps/ref_rejected": -93.14015197753906, + "logps/rejected": -1311.7078857421875, + "loss": 1.157, + "margin_dpo/margin_mean": 409.248291015625, + "margin_dpo/margin_std": 746.950927734375, + "step": 672 + }, + { + "KL/chosen_KL_mean": -722.28857421875, + "KL/mean": -917.1810302734375, + "KL/rejected_KL_mean": -1112.073486328125, + "KL/std": 562.255615234375, + "epoch": 0.9882525697503671, + "fcm_dpo/beta": 0.0009116331348195672, + "fcm_dpo/delta": 0.04632698372006416, + "fcm_dpo/margin": 389.7848205566406, + "fcm_dpo/q_t": 0.4198199510574341, + "grad_norm": 29.702667236328125, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -1.029843807220459, + "logits/rejected": -1.0315158367156982, + "logps/chosen": -769.0118408203125, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -1197.36962890625, + "loss": 1.1329, + "margin_dpo/margin_mean": 389.7848205566406, + "margin_dpo/margin_std": 619.7523803710938, + "step": 673 + }, + { + "KL/chosen_KL_mean": -580.1348266601562, + "KL/mean": -826.81103515625, + "KL/rejected_KL_mean": -1073.4873046875, + "KL/std": 549.4868774414062, + "epoch": 0.9897209985315712, + "fcm_dpo/beta": 0.0009095786954276264, + "fcm_dpo/delta": -0.05102291703224182, + "fcm_dpo/margin": 493.3524475097656, + "fcm_dpo/q_t": 0.4003087282180786, + "grad_norm": 38.08716583251953, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.9516767263412476, + "logits/rejected": -0.9709774255752563, + "logps/chosen": -625.580322265625, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -1143.533203125, + "loss": 1.0664, + "margin_dpo/margin_mean": 493.3524169921875, + "margin_dpo/margin_std": 656.5906982421875, + "step": 674 + }, + { + "KL/chosen_KL_mean": -678.9006958007812, + "KL/mean": -938.8206787109375, + "KL/rejected_KL_mean": -1198.7406005859375, + "KL/std": 619.1822509765625, + "epoch": 0.9911894273127754, + "fcm_dpo/beta": 0.000889546936377883, + "fcm_dpo/delta": -0.06706520915031433, + "fcm_dpo/margin": 519.83984375, + "fcm_dpo/q_t": 0.3973570168018341, + "grad_norm": 24.625337600708008, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -1.0201187133789062, + "logits/rejected": -1.0345721244812012, + "logps/chosen": -723.0770263671875, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -1272.83251953125, + "loss": 1.0605, + "margin_dpo/margin_mean": 519.83984375, + "margin_dpo/margin_std": 662.8849487304688, + "step": 675 + }, + { + "KL/chosen_KL_mean": -727.82421875, + "KL/mean": -965.162109375, + "KL/rejected_KL_mean": -1202.5, + "KL/std": 583.515869140625, + "epoch": 0.9926578560939795, + "fcm_dpo/beta": 0.000891472096554935, + "fcm_dpo/delta": -0.024206459522247314, + "fcm_dpo/margin": 474.67572021484375, + "fcm_dpo/q_t": 0.4033673405647278, + "grad_norm": 24.022327423095703, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -0.9483212232589722, + "logits/rejected": -0.9590877294540405, + "logps/chosen": -799.2227783203125, + "logps/ref_chosen": -71.39852905273438, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -1290.858642578125, + "loss": 1.0732, + "margin_dpo/margin_mean": 474.6757507324219, + "margin_dpo/margin_std": 593.3427124023438, + "step": 676 + }, + { + "KL/chosen_KL_mean": -726.002685546875, + "KL/mean": -963.833251953125, + "KL/rejected_KL_mean": -1201.663818359375, + "KL/std": 609.81103515625, + "epoch": 0.9941262848751835, + "fcm_dpo/beta": 0.0008846810087561607, + "fcm_dpo/delta": -0.021789535880088806, + "fcm_dpo/margin": 475.6611022949219, + "fcm_dpo/q_t": 0.40949833393096924, + "grad_norm": 27.512174606323242, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -1.0342793464660645, + "logits/rejected": -1.0246810913085938, + "logps/chosen": -782.5301513671875, + "logps/ref_chosen": -56.527435302734375, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -1279.890380859375, + "loss": 1.0982, + "margin_dpo/margin_mean": 475.66107177734375, + "margin_dpo/margin_std": 737.0035400390625, + "step": 677 + }, + { + "KL/chosen_KL_mean": -603.8602294921875, + "KL/mean": -871.7525634765625, + "KL/rejected_KL_mean": -1139.6448974609375, + "KL/std": 642.23291015625, + "epoch": 0.9955947136563876, + "fcm_dpo/beta": 0.0008793273009359837, + "fcm_dpo/delta": -0.07475695013999939, + "fcm_dpo/margin": 535.78466796875, + "fcm_dpo/q_t": 0.3942733407020569, + "grad_norm": 32.770172119140625, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -0.9874995946884155, + "logits/rejected": -1.0228235721588135, + "logps/chosen": -649.9947509765625, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -1220.24951171875, + "loss": 1.0583, + "margin_dpo/margin_mean": 535.78466796875, + "margin_dpo/margin_std": 712.2026977539062, + "step": 678 + }, + { + "KL/chosen_KL_mean": -696.7166748046875, + "KL/mean": -885.9703369140625, + "KL/rejected_KL_mean": -1075.22412109375, + "KL/std": 518.1109619140625, + "epoch": 0.9970631424375918, + "fcm_dpo/beta": 0.0008801834774203598, + "fcm_dpo/delta": 0.06918685883283615, + "fcm_dpo/margin": 378.50738525390625, + "fcm_dpo/q_t": 0.42413240671157837, + "grad_norm": 30.4984130859375, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -1.0104858875274658, + "logits/rejected": -1.000281810760498, + "logps/chosen": -747.0115966796875, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -1151.8221435546875, + "loss": 1.1443, + "margin_dpo/margin_mean": 378.50738525390625, + "margin_dpo/margin_std": 593.6575927734375, + "step": 679 + }, + { + "KL/chosen_KL_mean": -685.6356201171875, + "KL/mean": -937.115966796875, + "KL/rejected_KL_mean": -1188.59619140625, + "KL/std": 657.9539184570312, + "epoch": 0.9985315712187959, + "fcm_dpo/beta": 0.0008748341351747513, + "fcm_dpo/delta": -0.04234904423356056, + "fcm_dpo/margin": 502.96063232421875, + "fcm_dpo/q_t": 0.3994414210319519, + "grad_norm": 45.33549118041992, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -0.9940932989120483, + "logits/rejected": -1.0226861238479614, + "logps/chosen": -762.55126953125, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -1300.98095703125, + "loss": 1.0878, + "margin_dpo/margin_mean": 502.96063232421875, + "margin_dpo/margin_std": 730.917724609375, + "step": 680 + }, + { + "KL/chosen_KL_mean": -695.575439453125, + "KL/mean": -892.23193359375, + "KL/rejected_KL_mean": -1088.888427734375, + "KL/std": 566.686767578125, + "epoch": 1.0, + "fcm_dpo/beta": 0.0008728657849133015, + "fcm_dpo/delta": -0.041127026081085205, + "fcm_dpo/margin": 393.31298828125, + "fcm_dpo/q_t": 0.4215954542160034, + "grad_norm": 27.740650177001953, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -1.0516822338104248, + "logits/rejected": -1.0712807178497314, + "logps/chosen": -756.53271484375, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.55797576904297, + "logps/rejected": -1177.446533203125, + "loss": 1.1472, + "margin_dpo/margin_mean": 393.31298828125, + "margin_dpo/margin_std": 621.3822631835938, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 1.1094634984558374, + "train_runtime": 1738.7131, + "train_samples_per_second": 25.075, + "train_steps_per_second": 0.392 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}